summaryrefslogtreecommitdiff
path: root/xmerge/source/aportisdoc/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java
blob: a9ebc6ab0045f3b37cf6d948bb4ba01ffeb6ebb9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */

package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;

import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import org.w3c.dom.Text;

import java.io.IOException;
import java.util.Enumeration;

import org.openoffice.xmerge.Document;
import org.openoffice.xmerge.ConvertData;
import org.openoffice.xmerge.ConvertException;
import org.openoffice.xmerge.DocumentDeserializer;
import org.openoffice.xmerge.converter.xml.OfficeConstants;
import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
import org.openoffice.xmerge.converter.palm.PalmDB;
import org.openoffice.xmerge.converter.palm.Record;
import org.openoffice.xmerge.converter.palm.PalmDocument;
import org.openoffice.xmerge.util.Debug;

/**
 *  <p>AportisDoc implementation of <code>DocumentDeserializer</code>
 *  for the {@link
 *  org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl
 *  PluginFactoryImpl}.</p>
 *
 *  <p>This converts an file in AportisDoc PDB format to StarOffice
 *  XML format.</p>
 *
 *  <p>The <code>deserialize</code> method uses a <code>DocDecoder</code>
 *  to read the AportisDoc format into a <code>String</code> object, then
 *  it calls <code>buildDocument</code> to create a <code>SxwDocument</code>
 *  object from it.</p>
 *
 *  @author      Herbie Ong
 */
public final class DocumentDeserializerImpl
    implements OfficeConstants, DocConstants, DocumentDeserializer {

    /**  A <code>ConvertData</code> object assigned to this object. */
    private ConvertData cd = null;


    /**
     *  Constructor that assigns the given <code>ConvertData</code>
     *  to this object as input.
     *
     *  @param  cd  A <code>ConvertData</code> object to read data for
     *              the conversion process by the <code>deserialize</code>
     *              method.
     */
    public DocumentDeserializerImpl(ConvertData cd) {
        this.cd = cd;
    }


    /**
     *  Convert the given <code>ConvertData</code> object
     *  into a <code>SxwDocument</code> object.
     *
     *  @return  Resulting <code>SxwDocument</code> object.
     *
     *  @throws  ConvertException   If any conversion error occurs.
     *  @throws  IOException        If any I/O error occurs.
     */
    public Document deserialize() throws IOException, ConvertException {

        int numberOfPDBs = cd.getNumDocuments();
        Document doc = null;
        int i=0;
        ConvertData cdOut;
        Enumeration e = cd.getDocumentEnumeration();
        while (e.hasMoreElements()) {
            PalmDocument palmDoc = (PalmDocument) e.nextElement();
            PalmDB pdb = palmDoc.getPdb();

            log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
            log("<AportisDoc>");

            Record[] recs = pdb.getRecords();
            String docName = palmDoc.getName();
            DocDecoder decoder = new DocDecoder();
            String text = decoder.parseRecords(recs);
            doc = buildDocument(docName, text);

            log("</AportisDoc>");
        }

        return doc;
    }


    /**
     *  Parses the text content of an AportisDoc format and build a
     *  <code>SxwDocument</code>.
     *
     *  @param  docName  Name of <code>Document</code>.
     *  @param  str      Text content of AportisDoc format.
     *
     *  @return  Resulting <code>SxwDocument</code> object.
     *
     *  @throws  IOException  If any I/O error occurs.
     */
    private SxwDocument buildDocument(String docName, String str)
        throws IOException {

        // create minimum office xml document.
        SxwDocument sxwDoc = new SxwDocument(docName);
        sxwDoc.initContentDOM();

        org.w3c.dom.Document doc = sxwDoc.getContentDOM();

        // Grab hold of the office:body tag,
        // Assume there should be one.
        // This is where top level paragraphs will append to.
        NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
        Node bodyNode = list.item(0);

        // Store all the text in a character array.
        char[] text = str.toCharArray();

        // startIndex has 2 purposes:
        // if value is -1, it means that there are no text characters
        // needed to be processed for a Text node.  if value >= 0, it
        // is the index of the starting position of a text section
        // for a Text node.
        int startIndex = -1;

        // Create a paragraph node to start with.
        Element paraNode = doc.createElement(TAG_PARAGRAPH);

        log("<PARA>");

        for (int i = 0; i < text.length; i++) {

            switch (text[i]) {

                case TAB_CHAR:

                    // Check if there are text to be processed first.
                    if (startIndex >= 0) {
                        addTextNode(doc, paraNode, text, startIndex, i - 1);
                        startIndex = -1;
                    }

                    // Then, add tab element.
                    Element tabNode = doc.createElement(TAG_TAB_STOP);
                    paraNode.appendChild(tabNode);

                    log("<TAB/>");
                    break;

                case EOL_CHAR:

                    // Check if there are text to be processed first.
                    if (startIndex >= 0) {
                        addTextNode(doc, paraNode, text, startIndex, i - 1);
                        startIndex = -1;
                    }

                    // Then, add the current paragraph to body.
                    bodyNode.appendChild(paraNode);

                    // Create another paragraph element.
                    paraNode = doc.createElement(TAG_PARAGRAPH);

                    log("</PARA>");
                    log("<PARA>");
                    break;

                case SPACE_CHAR:

                    // count is the number of space chars from i
                    int count = 0;

                    // Do a look ahead and count the number of space chars
                    while (text[i + 1 + count] == SPACE_CHAR) {
                        count++;
                    }

                    // Need to build a space node ONLY if count is > 1.

                    if (count > 0) {

                        // Check if there are text to be processed first
                        if (startIndex >= 0) {
                            addTextNode(doc, paraNode, text,
                                        startIndex, i);
                            startIndex = -1;
                        }

                        // Then, create a space element
                        // with the proper attribute.
                        Element spaceNode = doc.createElement(TAG_SPACE);
                        spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT,
                            Integer.toString(count));

                        paraNode.appendChild(spaceNode);

                        // reposition i to the last space character.
                        i += count;

                        log("<SPACE count=\"" + count + "\" />");

                    } else {

                        // If there are no chars for text node yet,
                        // consider this one.
                        if (startIndex < 0) {

                            startIndex = i;
                            log("<TEXT>");
                        }
                    }

                    break;

                default:

                    // If there are no chars for text node yet,
                    // this should be the start.
                    if (startIndex < 0) {

                        startIndex = i;
                        log("<TEXT>");
                    }

                    break;
            }
        }

        int lastIndex = text.length - 1;

        // Check if there are text to be processed first.

        if (startIndex >= 0) {
            addTextNode(doc, paraNode, text, startIndex, lastIndex);
        }

        // Then, add the last paragraph element if it is not added yet.
        if (text[lastIndex] != EOL_CHAR) {
            bodyNode.appendChild(paraNode);
        }

        log("</PARA>");

        return sxwDoc;
    }


    /**
     *  Add a Text <code>Node</code> to the given paragraph node with the
     *  text starting at the given <code>startPos</code> until
     *  <code>endPos</code>.
     *
     *  @param  doc       <code>org.w3c.dom.Document</code> object for creating
     *                    <code>Node</code> objects.
     *  @param  para      The current paragraph <code>Node</code> to append
     *                    text <code>Node</code>.
     *  @param  text      Array of characters containing text.
     *  @param  startPos  Starting index position for text value.
     *  @param  endPos    End index position for text value.
     */
    private void addTextNode(org.w3c.dom.Document doc, Node para, char text[],
        int startPos, int endPos) {

        String str = new String(text, startPos, endPos - startPos + 1);
        Text textNode = doc.createTextNode(str);
        para.appendChild(textNode);
        log(str);
        log("</TEXT>");
    }

    /**
     *  Sends message to the log object.
     *
     *  @param  str  Debug message.
     */
    private void log(String str) {

        Debug.log(Debug.TRACE, str);
    }
}