summaryrefslogtreecommitdiff
path: root/test/pdf-fullrewrite.cc
blob: 2b912f7e45ce883868ae5f2619210a3bec703241 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
//========================================================================
//
// pdf-fullrewrite.cc
//
// Copyright 2007 Julien Rebetez
// Copyright 2012 Fabio D'Urso
//
//========================================================================

#include "GlobalParams.h"
#include "Error.h"
#include "Object.h"
#include "PDFDoc.h"
#include "XRef.h"
#include "goo/GooString.h"
#include "utils/parseargs.h"

static GBool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc);
static GBool compareObjects(Object *objA, Object *objB);

static char ownerPassword[33] = "\001";
static char userPassword[33] = "\001";
static GBool forceIncremental = gFalse;
static GBool checkOutput = gFalse;
static GBool printHelp = gFalse;

static const ArgDesc argDesc[] = {
  {"-opw",    argString,   ownerPassword,    sizeof(ownerPassword),
   "owner password (for encrypted files)"},
  {"-upw",    argString,   userPassword,     sizeof(userPassword),
   "user password (for encrypted files)"},
  {"-i",      argFlag,     &forceIncremental,0,
   "incremental update mode"},
  {"-check",  argFlag,     &checkOutput,     0,
   "verify the generated document"},
  {"-h",      argFlag,     &printHelp,       0,
   "print usage information"},
  {"-help",   argFlag,     &printHelp,       0,
   "print usage information"},
  {"--help",  argFlag,     &printHelp,       0,
   "print usage information"},
  {"-?",      argFlag,     &printHelp,       0,
   "print usage information"},
  {NULL}
};

int main (int argc, char *argv[])
{
  PDFDoc *doc = NULL;
  PDFDoc *docOut = NULL;
  GooString *inputName = NULL;
  GooString *outputName = NULL;
  GooString *ownerPW = NULL;
  GooString *userPW = NULL;
  int res = 0;

  // parse args
  GBool ok = parseArgs(argDesc, &argc, argv);
  if (!ok || (argc < 3) || printHelp) {
    printUsage(argv[0], "INPUT-FILE OUTPUT-FILE", argDesc);
    if (!printHelp) {
      res = 1;
    }
    goto done;
  }

  inputName = new GooString(argv[1]);
  outputName = new GooString(argv[2]);

  if (ownerPassword[0] != '\001') {
    ownerPW = new GooString(ownerPassword);
  }
  if (userPassword[0] != '\001') {
    userPW = new GooString(userPassword);
  }

  // load input document
  globalParams = new GlobalParams();
  doc = new PDFDoc(inputName, ownerPW, userPW);
  if (!doc->isOk()) {
    fprintf(stderr, "Error loading input document\n");
    res = 1;
    goto done;
  }

  // save it back (in rewrite or incremental update mode)
  if (doc->saveAs(outputName, forceIncremental ? writeForceIncremental : writeForceRewrite) != 0) {
    fprintf(stderr, "Error saving document\n");
    res = 1;
    goto done;
  }

  if (checkOutput) {
    // open the generated document to verify it
    docOut = new PDFDoc(outputName, ownerPW, userPW);
    if (!docOut->isOk()) {
      fprintf(stderr, "Error loading generated document\n");
      res = 1;
    } else if (!compareDocuments(doc, docOut)) {
      fprintf(stderr, "Verification failed\n");
      res = 1;
    }
  } else {
    delete outputName;
  }

done:
  delete docOut;
  delete doc;
  delete globalParams;
  delete userPW;
  delete ownerPW;
  return res;
}

static GBool compareDictionaries(Dict *dictA, Dict *dictB)
{
  const int length = dictA->getLength();
  if (dictB->getLength() != length)
    return gFalse;

  /* Check that every key in dictA is contained in dictB.
   * Since keys are unique and we've already checked that dictA and dictB
   * contain the same number of entries, we don't need to check that every key
   * in dictB is also contained in dictA */
  for (int i = 0; i < length; ++i) {
    Object valA, valB;
    const char *key = dictA->getKey(i);
    dictA->getValNF(i, &valA);
    dictB->lookupNF(key, &valB);
    if (!compareObjects(&valA, &valB))
      return gFalse;
    valA.free();
    valB.free();
  }

  return gTrue;
}

static GBool compareObjects(Object *objA, Object *objB)
{
  switch (objA->getType()) {
    case objBool:
    {
      if (objB->getType() != objBool) {
        return gFalse;
      } else {
        return (objA->getBool() == objB->getBool());
      }
    }
    case objInt:
    case objReal:
    {
      if (!objB->isNum()) {
        return gFalse;
      } else {
        // Fuzzy comparison
        const double diff = objA->getNum() - objB->getNum();
        return (-0.01 < diff) && (diff < 0.01);
      }
    }
    case objUint:
    {
      if (objB->getType() != objUint) {
        return gFalse;
      } else {
        return (objA->getUint() == objB->getUint());
      }
    }
    case objString:
    {
      if (objB->getType() != objString) {
        return gFalse;
      } else {
        GooString *strA = objA->getString();
        GooString *strB = objB->getString();
        return (strA->cmp(strB) == 0);
      }
    }
    case objName:
    {
      if (objB->getType() != objName) {
        return gFalse;
      } else {
        GooString nameA(objA->getName());
        GooString nameB(objB->getName());
        return (nameA.cmp(&nameB) == 0);
      }
    }
    case objNull:
    {
      if (objB->getType() != objNull) {
        return gFalse;
      } else {
        return gTrue;
      }
    }
    case objArray:
    {
      if (objB->getType() != objArray) {
        return gFalse;
      } else {
        Array *arrayA = objA->getArray();
        Array *arrayB = objB->getArray();
        const int length = arrayA->getLength();
        if (arrayB->getLength() != length) {
          return gFalse;
        } else {
          for (int i = 0; i < length; ++i) {
            Object elemA, elemB;
            arrayA->getNF(i, &elemA);
            arrayB->getNF(i, &elemB);
            if (!compareObjects(&elemA, &elemB)) {
              return gFalse;
            }
            elemA.free();
            elemB.free();
          }
          return gTrue;
        }
      }
    }
    case objDict:
    {
      if (objB->getType() != objDict) {
        return gFalse;
      } else {
        Dict *dictA = objA->getDict();
        Dict *dictB = objB->getDict();
        return compareDictionaries(dictA, dictB);
      }
    }
    case objStream:
    {
      if (objB->getType() != objStream) {
        return gFalse;
      } else {
        Stream *streamA = objA->getStream();
        Stream *streamB = objB->getStream();
        if (!compareDictionaries(streamA->getDict(), streamB->getDict())) {
          return gFalse;
        } else {
          int c;
          streamA->reset();
          streamB->reset();
          do
          {
            c = streamA->getChar();
            if (c != streamB->getChar()) {
              return gFalse;
            }
          } while (c != EOF);
          return gTrue;
        }
      }
      return gTrue;
    }
    case objRef:
    {
      if (objB->getType() != objRef) {
        return gFalse;
      } else {
        Ref refA = objA->getRef();
        Ref refB = objB->getRef();
        return (refA.num == refB.num) && (refA.gen == refB.gen);
      }
    }
    default:
    {
      fprintf(stderr, "compareObjects failed: unexpected object type %u\n", objA->getType());
      return gFalse;
    }
  }
}

static GBool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc)
{
  GBool result = gTrue;
  XRef *origXRef = origDoc->getXRef();
  XRef *newXRef = newDoc->getXRef();

  // Make sure that special flags are set in both documents
  origXRef->scanSpecialFlags();
  newXRef->scanSpecialFlags();

  // Compare XRef tables' size
  const int origNumObjects = origXRef->getNumObjects();
  const int newNumObjects = newXRef->getNumObjects();
  if (forceIncremental && origXRef->isXRefStream()) {
    // In case of incremental update, expect a new entry to be appended to store the new XRef stream
    if (origNumObjects+1 != newNumObjects) {
      fprintf(stderr, "XRef table: Unexpected number of entries (%d+1 != %d)\n", origNumObjects, newNumObjects);
      result = gFalse;
    }
  } else {
    // In all other cases the number of entries must be the same
    if (origNumObjects != newNumObjects) {
      fprintf(stderr, "XRef table: Different number of entries (%d != %d)\n", origNumObjects, newNumObjects);
      result = gFalse;
    }
  }

  // Compare each XRef entry
  const int numObjects = (origNumObjects < newNumObjects) ? origNumObjects : newNumObjects;
  for (int i = 0; i < numObjects; ++i) {
    XRefEntryType origType = origXRef->getEntry(i)->type;
    XRefEntryType newType = newXRef->getEntry(i)->type;
    const int origGenNum = (origType != xrefEntryCompressed) ? origXRef->getEntry(i)->gen : 0;
    const int newGenNum = (newType != xrefEntryCompressed) ? newXRef->getEntry(i)->gen : 0;

    // Check that DontRewrite entries are freed in full rewrite mode
    if (!forceIncremental && origXRef->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
      if (newType != xrefEntryFree || origGenNum+1 != newGenNum) {
        fprintf(stderr, "XRef entry %u: DontRewrite entry was not freed correctly\n", i);
        result = gFalse;
      }
      continue; // There's nothing left to check for this entry
    }

    // Compare generation numbers
    // Object num 0 should always have gen 65535 according to specs, but some
    // documents have it set to 0. We always write 65535 in output
    if (i != 0) {
      if (origGenNum != newGenNum) {
        fprintf(stderr, "XRef entry %u: generation numbers differ (%d != %d)\n", i, origGenNum, newGenNum);
        result = gFalse;
        continue;
      }
    } else {
      if (newGenNum != 65535) {
        fprintf(stderr, "XRef entry %u: generation number was expected to be 65535 (%d != 65535)\n", i, newGenNum);
        result = gFalse;
        continue;
      }
    }

    // Compare object flags. A failure shows that there's some error in XRef::scanSpecialFlags()
    if (origXRef->getEntry(i)->flags != newXRef->getEntry(i)->flags) {
      fprintf(stderr, "XRef entry %u: flags detected by scanSpecialFlags differ (%d != %d)\n", i, origXRef->getEntry(i)->flags, newXRef->getEntry(i)->flags);
      result = gFalse;
    }

    // Check that either both are free or both are in use
    if ((origType == xrefEntryFree) != (newType == xrefEntryFree)) {
      const char *origStatus = (origType == xrefEntryFree) ? "free" : "in use";
      const char *newStatus = (newType == xrefEntryFree) ? "free" : "in use";
      fprintf(stderr, "XRef entry %u: usage status differs (%s != %s)\n", i, origStatus, newStatus);
      result = gFalse;
      continue;
    }

    // Skip free entries
    if (origType == xrefEntryFree) {
      continue;
    }

    // Compare contents
    Object origObj, newObj;
    origXRef->fetch(i, origGenNum, &origObj);
    newXRef->fetch(i, newGenNum, &newObj);
    if (!compareObjects(&origObj, &newObj)) {
      fprintf(stderr, "XRef entry %u: contents differ\n", i);
      result = gFalse;
    }
    origObj.free();
    newObj.free();
  }

  return result;
}