xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404

/*************************************************************************
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * Copyright 2000, 2010 Oracle and/or its affiliates.
 *
 * OpenOffice.org - a multi-platform office productivity suite
 *
 * This file is part of OpenOffice.org.
 *
 * OpenOffice.org is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License version 3
 * only, as published by the Free Software Foundation.
 *
 * OpenOffice.org is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License version 3 for more details
 * (a copy is included in the LICENSE file that accompanied this code).
 *
 * You should have received a copy of the GNU Lesser General Public License
 * version 3 along with OpenOffice.org.  If not, see
 * <http://www.openoffice.org/license.html>
 * for a copy of the LGPLv3 License.
 *
 ************************************************************************/

package org.openoffice.xmerge.converter.xml.sxw.pocketword;

import org.openoffice.xmerge.Document;
import org.openoffice.xmerge.converter.xml.ParaStyle;
import org.openoffice.xmerge.converter.xml.TextStyle;

import java.io.InputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;

import java.util.Enumeration;
import java.util.Vector;


/**
 * <p>Class representing a Pocket Word Document.</p>
 *
 * <p><code>PocketWordDocument</code> is used to create new Pocket Word documents
 *    and to read existing data to allow for conversion to OpenOffice Writer
 *    format.</p>
 *
 * @author  Mark Murnane
 * @version 1.1
 */
public class PocketWordDocument implements Document, PocketWordConstants {
    private String      docName;

    private byte[] preamble;
    private Vector fonts;
    private DocumentDescriptor descriptor;
    private Vector paragraphs;

    private ParaStyle   pStyle;
    private Paragraph   currentPara;

    /*
     * The trailer currently appears to be constant, but if its found to
     * have a variable component, then this initialisation should be moved
     * to an initTrailer() method.
     *
     * Padding is sometimes needed before the trailer to ensure the file
     * ends on a 4-byte boundary, but this is handled in write().
     */
    private static final byte[] trailer = new byte[] { (byte)0x82, 0x00,
                                                             0x09, 0x00,
                                                             0x03, 0x00,
                                                             (byte)0x82, 0x00,
                                                             0x00, 0x00,
                                                             0x00, 0x00,
                                                             0x00, 0x00,
                                                             0x00, 0x00,
                                                             0x00, 0x00 };


    /**
     * <p>Constructs a new Pocket Word Document.</p>
     *
     * <p>This new document does notcontain any information.  Document data must
     *    either be added using appropriate methods, or an existing file can be
     *    {@link #read(InputStream) read} from an <code>InputStream</code>.</p>
     *
     * @param   name    The name of the <code>PocketWordDocument</code>.
     */
    public PocketWordDocument(String name) {

        docName = trimDocumentName(name);

        preamble   = new byte[52];
        fonts      = new Vector(0, 1);
        descriptor = new DocumentDescriptor();
        paragraphs = new Vector(0, 1);
    }


    /**
     * <p>This method reads <code>byte</code> data from the InputStream and
     *    extracts font and paragraph data from the file.</p>
     *
     * @param   docData         InputStream containing a Pocket Word data file.
     *
     * @throws  IOException     In case of any I/O errors.
     */
    public void read(InputStream docData) throws IOException {

        if (docData == null) {
            throw new IOException ("No input stream to convert");
        }

        // The preamble may become important for font declarations.
        int readValue = docData.read(preamble);
        // #i33702# check for an empty InputStream.
        if(readValue == -1) {
            System.err.println("Error:invalid input stream");
            return;
        }

        byte[] font = new byte[80];
        int numfonts = 0;
        do {
            docData.read(font);

            String name = new String(font, 0, 64, "UTF-16LE");
            fonts.add(name.trim());

        } while (!(font[76] == 5 && font[77] == 0
                            && font[78] == 1 && font[79] == 0));

        /*
         * TODO:  The document descriptor data that follows the fonts ends with
         *        a variable section containing data for each of the paragraphs.
         *        It may be possible to use this information to calculate staring
         *        positions for each paragraph rather than iterating through the
         *        entire byte stream.
         */

        int value;
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        while ((value = docData.read()) != -1) {
            bos.write(value);
        }


        byte[] contentData = bos.toByteArray();
        int start = 0, end = 0;
        boolean sawMarker = false;

        for (int i = 0; i < contentData.length; i += 4) {
            if (contentData[i  + 2] == (byte)0xFF
                        && contentData[i + 3] == (byte)0xFF && !sawMarker)  {
                start = i - 8;
                sawMarker = true;
                continue;
            }

            if (contentData[i + 2] == (byte)0xFF
                        && contentData[i + 3] == (byte)0xFF && sawMarker) {
                end = i - 8;
                ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
                paragraph.write(contentData, start, end - start);
                paragraphs.add(new Paragraph(paragraph.toByteArray()));

                // Reset the markers
                sawMarker = false;
                i -= 4;  // Skip back
            }

        }

        /*
         * Special case, the last paragraph
         * If we got here, and the marker is set then we saw the start of the
         * last paragraph, but no following paragraph
         */
        ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
        if (contentData[contentData.length - 19] == 0) {
            paragraph.write(contentData, start, contentData.length - start - 20);
        }
        else {
            paragraph.write(contentData, start, contentData.length - start - 18);
        }
        paragraphs.add(new Paragraph(paragraph.toByteArray()));
    }


    /*
     * Utility method to make sure the document name is stripped of any file
     * extensions before use.
     */
    private String trimDocumentName(String name) {
        String temp = name.toLowerCase();

        if (temp.endsWith(FILE_EXTENSION)) {
            // strip the extension
            int nlen = name.length();
            int endIndex = nlen - FILE_EXTENSION.length();
            name = name.substring(0,endIndex);
        }

        return name;
    }


    /**
     * <p>Method to provide access to all of the <code>Paragraph</code> objects
     *    in the <code>Document</code>.</p>
     *
     * @return <code>Enumeration</code> over the paragraphs in the document.
     */
    public Enumeration getParagraphEnumeration() {
        return paragraphs.elements();
    }


    /**
     * <p>Returns the <code>Document</code> name with no file extension.</p>
     *
     * @return  The <code>Document</code> name with no file extension.
     */
    public String getName() {
        return docName;
    }


    /**
     * <p>Returns the <code>Document</code> name with file extension.</p>
     *
     * @return  The <code>Document</code> name with file extension.
     */
    public String getFileName() {
        return new String(docName + FILE_EXTENSION);
    }


    /**
     * <p>Writes out the <code>Document</code> content to the specified
     * <code>OutputStream</code>.</p>
     *
     * <p>This method may not be thread-safe.
     * Implementations may or may not synchronize this
     * method.  User code (i.e. caller) must make sure that
     * calls to this method are thread-safe.</p>
     *
     * @param  os  <code>OutputStream</code> to write out the
     *             <code>Document</code> content.
     *
     * @throws  IOException  If any I/O error occurs.
     */
    public void write(OutputStream os) throws IOException {
        DataOutputStream dos = new DataOutputStream(os);

        initPreamble();
        dos.write(preamble);

        loadFonts();
        for (int i = 0; i < fonts.size(); i++ ) {
            ByteArrayOutputStream fontData = (ByteArrayOutputStream)fonts.elementAt(i);
            dos.write(fontData.toByteArray());
        }


        for (int i = 0; i < paragraphs.size(); i++) {
            Paragraph para = (Paragraph)paragraphs.elementAt(i);
            descriptor.addParagraph((short)para.getTextLength(), para.getLines());
        }
        dos.write(descriptor.getDescriptor());

        for (int i = 0; i < paragraphs.size(); i++ ) {
            Paragraph para = (Paragraph)paragraphs.elementAt(i);

            // Last paragraph has some extra data
            if (i + 1 == paragraphs.size()) {
                para.setLastParagraph(true);
            }
            dos.write(para.getParagraphData());
        }


        /*
         * Before we write out the trailer, we need to make sure that it will
         * lead to the file ending on a 4 byte boundary.
         */
        if (dos.size() % 4 == 0) {
            dos.write((byte)0x00);
            dos.write((byte)0x00);
        }

        dos.write(trailer);

        dos.flush();
        dos.close();
    }


    /**
     * <p>This method adds a new paragraph element to the document.  No string
     *    data is added to the paragraph.</p>
     *
     * <p><b>N.B.</b> The newly added paragraph becomes the current paragraph and
     *    is used as the target for all subsequent calls to addParagraphData().</p>
     *
     * @param   style       Paragraph Style object describing the formatting for
     *                      the new paragraph.  Can be null.
     * @param   listElement true if this paragraph is to be bulleted;
     *                      false otherwise.
     */
    public void addParagraph(ParaStyle style, boolean listElement)  {
        /* For the moment, only support basic text entry in a single paragraph */
        Paragraph para = new Paragraph(style);

        paragraphs.add(para);

        pStyle = style;
        currentPara = para;

        if (listElement) {
            para.setBullets(true);
        }
    }


    /**
     * <p>This method adds text to the current paragraph.</p>
     *
     * <p>If no paragraphs exist within the document, it creates one.</p>
     *
     * @param   data        The string data for this segment.
     * @param   style       Text Style object describing the formatting of this
     *                      segment.  Can be null.
     */
    public void addParagraphData(String data, TextStyle style) {
        if (currentPara == null) {
            addParagraph(null, false);
        }
        currentPara.addTextSegment(data, style);
    }


    /*
     * Preamble is the portion before font specification which never
     * seems to change from one file, or one saved version, to the next.
     *
     * Bytes 18h and 19h seem to contain the number of fonts and should
     * be modified when all of the fonts have been specified.
     * These bytes are the first two on the fourth line below.
     */
    private void initPreamble() {
         preamble = new byte[] { 0x7B, 0x5C, 0x70, 0x77, 0x69, 0x15, 0x00, 0x00,
                                 0x01, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
                                 0x02, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x0A, 0x00,  // Bytes 3-4 Font??
                                 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // Bytes 1-2  # Fonts
                                 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00,
                                 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
                                 0x00, 0x00, 0x00, 0x00 };
    }


    /*
     * This method writes the minimum font data that is used by the converter.
     * Currently, all documents convert to 10 point Courier New.  Tahoma is
     * always mentioned in Pocket Word files, however, even if it is not used.
     *
     * TODO:    Rewrite to allow for multiple fonts once font support issues
     *          have been resolved.
     */
    private void loadFonts() {
        ByteArrayOutputStream fontData = new ByteArrayOutputStream();

        try {
            fontData.write(new String("Tahoma").getBytes("UTF-16LE"));
            fontData.write(new byte[52]);       // Rest of font name?
            fontData.write(new byte[] { 0x02, 0x00, 0x01, 0x00 } );
            fontData.write(new byte[] { 0x00, 0x00, 0x01, 0x00 } );
            fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );
            fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );

            fonts.add(fontData);

            fontData = new ByteArrayOutputStream();

            fontData.write(new String("Courier New").getBytes("UTF-16LE"));
            fontData.write(new byte[42]);
            fontData.write(new byte[] { 0x14, 0x00, 0x04, 0x00 } );
            fontData.write(new byte[] { 0x01, 0x00, 0x00, 0x00 } );
            fontData.write(new byte[] { 0x00, 0x00, 0x15, 0x00 } );

            // Next part indicates that this is the last font
            fontData.write(new byte[] { 0x05, 0x00, 0x01, 0x00 } );

            fonts.add(fontData);
        }
        catch (IOException ioe) {
            // Shouldn't happen as this is a memory based stream
        }
    }
}