summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
Diffstat (limited to 'utils')
-rw-r--r--utils/CMakeLists.txt4
-rw-r--r--utils/HtmlOutputDev.cc33
-rw-r--r--utils/ImageOutputDev.cc498
-rw-r--r--utils/ImageOutputDev.h45
-rw-r--r--utils/JSInfo.cc233
-rw-r--r--utils/JSInfo.h60
-rw-r--r--utils/Makefile.am4
-rw-r--r--utils/pdfimages.185
-rw-r--r--utils/pdfimages.cc45
-rw-r--r--utils/pdfinfo.16
-rw-r--r--utils/pdfinfo.cc27
-rw-r--r--utils/pdfseparate.cc47
-rw-r--r--utils/pdftocairo.cc4
-rw-r--r--utils/pdftohtml.cc6
-rw-r--r--utils/pdftoppm.112
-rw-r--r--utils/pdftoppm.cc17
-rw-r--r--utils/pdftotext.cc6
17 files changed, 932 insertions, 200 deletions
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index f82cfa4d..2f04b39d 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -69,6 +69,8 @@ set(pdfimages_SOURCES ${common_srcs}
pdfimages.cc
ImageOutputDev.cc
ImageOutputDev.h
+ JSInfo.cc
+ JSInfo.h
)
add_executable(pdfimages ${pdfimages_SOURCES})
target_link_libraries(pdfimages ${common_libs})
@@ -78,6 +80,8 @@ install(FILES pdfimages.1 DESTINATION share/man/man1)
# pdfinfo
set(pdfinfo_SOURCES ${common_srcs}
pdfinfo.cc printencodings.cc
+ JSInfo.cc
+ JSInfo.h
)
add_executable(pdfinfo ${pdfinfo_SOURCES})
target_link_libraries(pdfinfo ${common_libs})
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
index 7926674e..a3ae239e 100644
--- a/utils/HtmlOutputDev.cc
+++ b/utils/HtmlOutputDev.cc
@@ -25,7 +25,7 @@
// Copyright (C) 2009 Warren Toomey <wkt@tuhs.org>
// Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2009 Reece Dunn <msclrhd@gmail.com>
-// Copyright (C) 2010, 2012 Adrian Johnson <ajohnson@redneon.com>
+// Copyright (C) 2010, 2012, 2013 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
// Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
@@ -1413,32 +1413,39 @@ void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int he
delete imgStr;
}
else { // isMask == true
- ImageStream *imgStr = new ImageStream(str, width, 1, 1);
- imgStr->reset();
+ int size = (width + 7)/8;
+
+ // PDF masks use 0 = draw current color, 1 = leave unchanged.
+ // We invert this to provide the standard interpretation of alpha
+ // (0 = transparent, 1 = opaque). If the colorMap already inverts
+ // the mask we leave the data unchanged.
+ int invert_bits = 0xff;
+ if (colorMap) {
+ GfxGray gray;
+ Guchar zero = 0;
+ colorMap->getGray(&zero, &gray);
+ if (colToByte(gray) == 0)
+ invert_bits = 0x00;
+ }
- Guchar *png_row = (Guchar *)gmalloc( width );
+ str->reset();
+ Guchar *png_row = (Guchar *)gmalloc(size);
for (int ri = 0; ri < height; ++ri)
{
- // read the row of the mask
- Guchar *bit_row = imgStr->getLine();
-
- // invert for PNG
- for(int i = 0; i < width; i++)
- png_row[i] = bit_row[i] ? 0xff : 0x00;
+ for(int i = 0; i < size; i++)
+ png_row[i] = str->getChar() ^ invert_bits;
if (!writer->writeRow( &png_row ))
{
error(errIO, -1, "Failed to write into PNG '%s'", fName->getCString());
delete writer;
fclose(f1);
- delete imgStr;
gfree(png_row);
return;
}
}
- imgStr->close();
- delete imgStr;
+ str->close();
gfree(png_row);
}
diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc
index 8b18d2b4..93ed0fe6 100644
--- a/utils/ImageOutputDev.cc
+++ b/utils/ImageOutputDev.cc
@@ -20,7 +20,7 @@
// Copyright (C) 2009 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2009 William Bader <williambader@hotmail.com>
// Copyright (C) 2010 Jakob Voss <jakob.voss@gbv.de>
-// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
+// Copyright (C) 2012, 2013 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2013 Thomas Fischer <fischer@unix-ag.uni-kl.de>
//
// To see a description of the changes please see the Changelog file that
@@ -39,27 +39,37 @@
#include <stdlib.h>
#include <stddef.h>
#include <ctype.h>
+#include <math.h>
#include "goo/gmem.h"
+#include "goo/NetPBMWriter.h"
+#include "goo/PNGWriter.h"
+#include "goo/TiffWriter.h"
#include "Error.h"
#include "GfxState.h"
#include "Object.h"
#include "Stream.h"
+#include "JBIG2Stream.h"
#include "ImageOutputDev.h"
-ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA) {
+ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool listImagesA) {
listImages = listImagesA;
if (!listImages) {
fileRoot = copyString(fileRootA);
fileName = (char *)gmalloc(strlen(fileRoot) + 45);
}
- dumpJPEG = dumpJPEGA;
+ outputPNG = gFalse;
+ outputTiff = gFalse;
+ dumpJPEG = gFalse;
+ dumpJP2 = gFalse;
+ dumpJBIG2 = gFalse;
+ dumpCCITT = gFalse;
pageNames = pageNamesA;
imgNum = 0;
pageNum = 0;
ok = gTrue;
if (listImages) {
- printf("page num type width height color comp bpc enc interp object ID\n");
- printf("---------------------------------------------------------------------\n");
+ printf("page num type width height color comp bpc enc interp object ID x-ppi y-ppi size ratio\n");
+ printf("--------------------------------------------------------------------------------------------\n");
}
}
@@ -79,6 +89,34 @@ void ImageOutputDev::setFilename(const char *fileExt) {
}
}
+
+// Print a floating point number between 0 - 9999 using 4 characters
+// eg '1.23', '12.3', ' 123', '1234'
+//
+// We need to be careful to handle the cases where rounding adds an
+// extra digit before the decimal. eg printf("%4.2f", 9.99999)
+// outputs "10.00" instead of "9.99".
+static void printNumber(double d)
+{
+ char buf[10];
+
+ if (d < 10.0) {
+ sprintf(buf, "%4.2f", d);
+ buf[4] = 0;
+ printf("%s", buf);
+ } else if (d < 100.0) {
+ sprintf(buf, "%4.1f", d);
+ if (!isdigit(buf[3])) {
+ buf[3] = 0;
+ printf(" %s", buf);
+ } else {
+ printf("%s", buf);
+ }
+ } else {
+ printf("%4.0f", d);
+ }
+}
+
void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
@@ -179,192 +217,370 @@ void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str,
printf("%-3s ", interpolate ? "yes" : "no");
if (inlineImg) {
- printf("[inline]\n");
+ printf("[inline] ");
} else if (ref->isRef()) {
const Ref imageRef = ref->getRef();
if (imageRef.gen >= 100000) {
- printf("[none]\n");
+ printf("[none] ");
} else {
- printf(" %6d %2d\n", imageRef.num, imageRef.gen);
+ printf(" %6d %2d ", imageRef.num, imageRef.gen);
}
} else {
- printf("[none]\n");
+ printf("[none] ");
}
+ double *mat = state->getCTM();
+ double width2 = mat[0] + mat[2];
+ double height2 = mat[1] + mat[3];
+ double xppi = fabs(width*72.0/width2) + 0.5;
+ double yppi = fabs(height*72.0/height2) + 0.5;
+ if (xppi < 1.0)
+ printf("%5.3f ", xppi);
+ else
+ printf("%5.0f ", xppi);
+ if (yppi < 1.0)
+ printf("%5.3f ", yppi);
+ else
+ printf("%5.0f ", yppi);
+
+ Goffset embedSize = -1;
+ if (!inlineImg)
+ embedSize = str->getBaseStream()->getLength();
+
+ long long imageSize = 0;
+ if (colorMap && colorMap->isOk())
+ imageSize = ((long long)width * height * colorMap->getNumPixelComps() * colorMap->getBits())/8;
+ else
+ imageSize = (long long)width*height/8; // mask
+
+ double ratio = -1.0;
+ if (imageSize > 0)
+ ratio = 100.0*embedSize/imageSize;
+
+ if (embedSize < 0) {
+ printf(" - ");
+ } else if (embedSize <= 9999) {
+ printf("%4lldB", embedSize);
+ } else {
+ double d = embedSize/1024.0;
+ if (d <= 9999.0) {
+ printNumber(d);
+ putchar('K');
+ } else {
+ d /= 1024.0;
+ if (d <= 9999.0) {
+ printNumber(d);
+ putchar('M');
+ } else {
+ d /= 1024.0;
+ printNumber(d);
+ putchar('G');
+ }
+ }
+ }
+
+ if (ratio > 9.9)
+ printf(" %3.0f%%\n", ratio);
+ else if (ratio >= 0.0)
+ printf(" %3.1f%%\n", ratio);
+ else
+ printf(" - \n");
+
++imgNum;
}
-void ImageOutputDev::writeMask(GfxState *state, Object *ref, Stream *str,
- int width, int height, GBool invert,
- GBool interpolate, GBool inlineImg) {
+void ImageOutputDev::writeRawImage(Stream *str, const char *ext) {
FILE *f;
int c;
- int size, i;
- // dump JPEG file
- if (dumpJPEG && str->getKind() == strDCT && !inlineImg) {
+ // open the image file
+ setFilename(ext);
+ ++imgNum;
+ if (!(f = fopen(fileName, "wb"))) {
+ error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
+ return;
+ }
- // open the image file
- setFilename("jpg");
- ++imgNum;
- if (!(f = fopen(fileName, "wb"))) {
- error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
- return;
- }
+ // initialize stream
+ str = str->getNextStream();
+ str->reset();
- // initialize stream
- str = str->getNextStream();
- str->reset();
+ // copy the stream
+ while ((c = str->getChar()) != EOF)
+ fputc(c, f);
- // copy the stream
- while ((c = str->getChar()) != EOF)
- fputc(c, f);
+ str->close();
+ fclose(f);
+}
- str->close();
- fclose(f);
+void ImageOutputDev::writeImageFile(ImgWriter *writer, ImageFormat format, const char *ext,
+ Stream *str, int width, int height, GfxImageColorMap *colorMap) {
+ FILE *f;
+ ImageStream *imgStr;
+ unsigned char *row;
+ unsigned char *rowp;
+ Guchar *p;
+ GfxRGB rgb;
+ GfxCMYK cmyk;
+ GfxGray gray;
+ Guchar zero = 0;
+ int invert_bits;
- // dump PBM file
- } else {
+ setFilename(ext);
+ ++imgNum;
+ if (!(f = fopen(fileName, "wb"))) {
+ error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
+ return;
+ }
- // open the image file and write the PBM header
- setFilename("pbm");
- ++imgNum;
- if (!(f = fopen(fileName, "wb"))) {
- error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
- return;
- }
- fprintf(f, "P4\n");
- fprintf(f, "%d %d\n", width, height);
+ if (!writer->init(f, width, height, 72, 72)) {
+ error(errIO, -1, "Error writing '{0:s}'", fileName);
+ return;
+ }
+ if (format != imgMonochrome) {
+ // initialize stream
+ imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(),
+ colorMap->getBits());
+ imgStr->reset();
+ } else {
// initialize stream
str->reset();
+ }
+
+ row = (unsigned char *) gmallocn(width, sizeof(unsigned int));
- // copy the stream
- size = height * ((width + 7) / 8);
- for (i = 0; i < size; ++i) {
- fputc(str->getChar(), f);
+ // PDF masks use 0 = draw current color, 1 = leave unchanged.
+ // We invert this to provide the standard interpretation of alpha
+ // (0 = transparent, 1 = opaque). If the colorMap already inverts
+ // the mask we leave the data unchanged.
+ invert_bits = 0xff;
+ if (colorMap) {
+ colorMap->getGray(&zero, &gray);
+ if (colToByte(gray) == 0)
+ invert_bits = 0x00;
+ }
+
+ // for each line...
+ for (int y = 0; y < height; y++) {
+ switch (format) {
+ case imgRGB:
+ p = imgStr->getLine();
+ rowp = row;
+ for (int x = 0; x < width; ++x) {
+ if (p) {
+ colorMap->getRGB(p, &rgb);
+ *rowp++ = colToByte(rgb.r);
+ *rowp++ = colToByte(rgb.g);
+ *rowp++ = colToByte(rgb.b);
+ p += colorMap->getNumPixelComps();
+ } else {
+ *rowp++ = 0;
+ *rowp++ = 0;
+ *rowp++ = 0;
+ }
+ }
+ writer->writeRow(&row);
+ break;
+
+ case imgCMYK:
+ p = imgStr->getLine();
+ rowp = row;
+ for (int x = 0; x < width; ++x) {
+ if (p) {
+ colorMap->getCMYK(p, &cmyk);
+ *rowp++ = colToByte(cmyk.c);
+ *rowp++ = colToByte(cmyk.m);
+ *rowp++ = colToByte(cmyk.y);
+ *rowp++ = colToByte(cmyk.k);
+ p += colorMap->getNumPixelComps();
+ } else {
+ *rowp++ = 0;
+ *rowp++ = 0;
+ *rowp++ = 0;
+ *rowp++ = 0;
+ }
+ }
+ writer->writeRow(&row);
+ break;
+
+ case imgGray:
+ p = imgStr->getLine();
+ rowp = row;
+ for (int x = 0; x < width; ++x) {
+ if (p) {
+ colorMap->getGray(p, &gray);
+ *rowp++ = colToByte(gray);
+ p += colorMap->getNumPixelComps();
+ } else {
+ *rowp++ = 0;
+ }
+ }
+ writer->writeRow(&row);
+ break;
+
+ case imgMonochrome:
+ int size = (width + 7)/8;
+ for (int x = 0; x < size; x++)
+ row[x] = str->getChar() ^ invert_bits;
+ writer->writeRow(&row);
+ break;
}
+ }
- str->close();
- fclose(f);
+ gfree(row);
+ if (format != imgMonochrome) {
+ imgStr->close();
+ delete imgStr;
}
+ str->close();
+ writer->close();
+ fclose(f);
}
void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
- GfxImageColorMap *colorMap,
- GBool interpolate, int *maskColors, GBool inlineImg) {
- FILE *f;
- ImageStream *imgStr;
- Guchar *p;
- Guchar zero = 0;
- GfxGray gray;
- GfxRGB rgb;
- int x, y;
- int c;
- int size, i;
- int pbm_mask = 0xff;
+ GfxImageColorMap *colorMap, GBool inlineImg) {
+ ImageFormat format;
- // dump JPEG file
if (dumpJPEG && str->getKind() == strDCT &&
(colorMap->getNumPixelComps() == 1 ||
colorMap->getNumPixelComps() == 3) &&
!inlineImg) {
- // open the image file
- setFilename("jpg");
- ++imgNum;
+ // dump JPEG file
+ writeRawImage(str, "jpg");
+
+ } else if (dumpJP2 && str->getKind() == strJPX && !inlineImg) {
+ // dump JPEG2000 file
+ writeRawImage(str, "jp2");
+
+ } else if (dumpJBIG2 && str->getKind() == strJBIG2 && !inlineImg) {
+ // dump JBIG2 globals stream if available
+ JBIG2Stream *jb2Str = static_cast<JBIG2Stream *>(str);
+ Object *globals = jb2Str->getGlobalsStream();
+ if (globals->isStream()) {
+ FILE *f;
+ int c;
+ Stream *str = globals->getStream();
+
+ setFilename("jb2g");
+ if (!(f = fopen(fileName, "wb"))) {
+ error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
+ return;
+ }
+ str->reset();
+ while ((c = str->getChar()) != EOF)
+ fputc(c, f);
+ str->close();
+ fclose(f);
+ }
+
+ // dump JBIG2 embedded file
+ writeRawImage(str, "jb2e");
+
+ } else if (dumpCCITT && str->getKind() == strCCITTFax && !inlineImg) {
+ // write CCITT parameters
+ CCITTFaxStream *ccittStr = static_cast<CCITTFaxStream *>(str);
+ FILE *f;
+ setFilename("params");
if (!(f = fopen(fileName, "wb"))) {
error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
return;
}
+ if (ccittStr->getEncoding() < 0)
+ fprintf(f, "-4 ");
+ else if (ccittStr->getEncoding() == 0)
+ fprintf(f, "-1 ");
+ else
+ fprintf(f, "-2 ");
- // initialize stream
- str = str->getNextStream();
- str->reset();
+ if (ccittStr->getEndOfLine())
+ fprintf(f, "-A ");
+ else
+ fprintf(f, "-P ");
+
+ fprintf(f, "-X %d ", ccittStr->getColumns());
- // copy the stream
- while ((c = str->getChar()) != EOF)
- fputc(c, f);
+ if (ccittStr->getBlackIs1())
+ fprintf(f, "-W ");
+ else
+ fprintf(f, "-B ");
+
+ fprintf(f, "-M\n"); // PDF uses MSB first
- str->close();
fclose(f);
- // dump PBM file
- } else if (colorMap->getNumPixelComps() == 1 &&
- colorMap->getBits() == 1) {
+ // dump CCITT file
+ writeRawImage(str, "ccitt");
- // open the image file and write the PBM header
- setFilename("pbm");
- ++imgNum;
- if (!(f = fopen(fileName, "wb"))) {
- error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
- return;
- }
- fprintf(f, "P4\n");
- fprintf(f, "%d %d\n", width, height);
+ } else if (outputPNG && !(outputTiff && colorMap &&
+ (colorMap->getColorSpace()->getMode() == csDeviceCMYK ||
+ (colorMap->getColorSpace()->getMode() == csICCBased &&
+ colorMap->getNumPixelComps() == 4)))) {
- // initialize stream
- str->reset();
+ // output in PNG format
- // if 0 comes out as 0 in the color map, the we _flip_ stream bits
- // otherwise we pass through stream bits unmolested
- colorMap->getGray(&zero, &gray);
- if(colToByte(gray))
- pbm_mask = 0;
+#if ENABLE_LIBPNG
+ ImgWriter *writer;
- // copy the stream
- size = height * ((width + 7) / 8);
- for (i = 0; i < size; ++i) {
- fputc(str->getChar() ^ pbm_mask, f);
+ if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) {
+ writer = new PNGWriter(PNGWriter::MONOCHROME);
+ format = imgMonochrome;
+ } else if (colorMap->getColorSpace()->getMode() == csDeviceGray ||
+ colorMap->getColorSpace()->getMode() == csCalGray) {
+ writer = new PNGWriter(PNGWriter::GRAY);
+ format = imgGray;
+ } else {
+ writer = new PNGWriter(PNGWriter::RGB);
+ format = imgRGB;
}
- str->close();
- fclose(f);
-
- // dump PPM file
- } else {
+ writeImageFile(writer, format, "png", str, width, height, colorMap);
+#endif
- // open the image file and write the PPM header
- setFilename("ppm");
- ++imgNum;
- if (!(f = fopen(fileName, "wb"))) {
- error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
- return;
+ } else if (outputTiff) {
+ // output in TIFF format
+
+#if ENABLE_LIBTIFF
+ ImgWriter *writer;
+
+ if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) {
+ writer = new TiffWriter(TiffWriter::MONOCHROME);
+ format = imgMonochrome;
+ } else if (colorMap->getColorSpace()->getMode() == csDeviceGray ||
+ colorMap->getColorSpace()->getMode() == csCalGray) {
+ writer = new TiffWriter(TiffWriter::GRAY);
+ format = imgGray;
+ } else if (colorMap->getColorSpace()->getMode() == csDeviceCMYK ||
+ (colorMap->getColorSpace()->getMode() == csICCBased && colorMap->getNumPixelComps() == 4)) {
+ writer = new TiffWriter(TiffWriter::CMYK);
+ format = imgCMYK;
+ } else {
+ writer = new TiffWriter(TiffWriter::RGB);
+ format = imgRGB;
}
- fprintf(f, "P6\n");
- fprintf(f, "%d %d\n", width, height);
- fprintf(f, "255\n");
- // initialize stream
- imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(),
- colorMap->getBits());
- imgStr->reset();
+ writeImageFile(writer, format, "tif", str, width, height, colorMap);
+#endif
- // for each line...
- for (y = 0; y < height; ++y) {
-
- // write the line
- if ((p = imgStr->getLine())) {
- for (x = 0; x < width; ++x) {
- colorMap->getRGB(p, &rgb);
- fputc(colToByte(rgb.r), f);
- fputc(colToByte(rgb.g), f);
- fputc(colToByte(rgb.b), f);
- p += colorMap->getNumPixelComps();
- }
- } else {
- for (x = 0; x < width; ++x) {
- fputc(0, f);
- fputc(0, f);
- fputc(0, f);
- }
- }
+ } else {
+ // output in PPM/PBM format
+ ImgWriter *writer;
+
+ if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) {
+ writer = new NetPBMWriter(NetPBMWriter::MONOCHROME);
+ format = imgMonochrome;
+ } else {
+ writer = new NetPBMWriter(NetPBMWriter::RGB);
+ format = imgRGB;
}
- imgStr->close();
- delete imgStr;
- fclose(f);
+ writeImageFile(writer, format,
+ format == imgRGB ? "ppm" : "pbm",
+ str, width, height, colorMap);
+
+ delete writer;
}
}
@@ -381,9 +597,9 @@ void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert,
GBool interpolate, GBool inlineImg) {
if (listImages)
- listImage(state, ref, str, width, height, NULL, interpolate, inlineImg, imgMask);
+ listImage(state, ref, str, width, height, NULL, interpolate, inlineImg, imgStencil);
else
- writeMask(state, ref, str, width, height, invert, interpolate, inlineImg);
+ writeImage(state, ref, str, width, height, NULL, inlineImg);
}
void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
@@ -393,7 +609,7 @@ void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
if (listImages)
listImage(state, ref, str, width, height, colorMap, interpolate, inlineImg, imgImage);
else
- writeImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg);
+ writeImage(state, ref, str, width, height, colorMap, inlineImg);
}
void ImageOutputDev::drawMaskedImage(
@@ -404,9 +620,8 @@ void ImageOutputDev::drawMaskedImage(
listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage);
listImage(state, ref, str, maskWidth, maskHeight, NULL, maskInterpolate, gFalse, imgMask);
} else {
- drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
- drawImageMask(state, ref, maskStr, maskWidth, maskHeight, maskInvert,
- maskInterpolate, gFalse);
+ writeImage(state, ref, str, width, height, colorMap, gFalse);
+ writeImage(state, ref, maskStr, maskWidth, maskHeight, NULL, gFalse);
}
}
@@ -419,8 +634,7 @@ void ImageOutputDev::drawSoftMaskedImage(
listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage);
listImage(state, ref, maskStr, maskWidth, maskHeight, maskColorMap, maskInterpolate, gFalse, imgSmask);
} else {
- drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
- drawImage(state, ref, maskStr, maskWidth, maskHeight,
- maskColorMap, maskInterpolate, NULL, gFalse);
+ writeImage(state, ref, str, width, height, colorMap, gFalse);
+ writeImage(state, ref, maskStr, maskWidth, maskHeight, maskColorMap, gFalse);
}
}
diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h
index 13911ed3..14918ea8 100644
--- a/utils/ImageOutputDev.h
+++ b/utils/ImageOutputDev.h
@@ -36,6 +36,7 @@
#include <stdio.h>
#include "goo/gtypes.h"
+#include "goo/ImgWriter.h"
#include "OutputDev.h"
class GfxState;
@@ -52,17 +53,42 @@ public:
imgMask,
imgSmask
};
+ enum ImageFormat {
+ imgRGB,
+ imgGray,
+ imgMonochrome,
+ imgCMYK
+ };
// Create an OutputDev which will write images to files named
// <fileRoot>-NNN.<type> or <fileRoot>-PPP-NNN.<type>, if
// <pageNames> is set. Normally, all images are written as PBM
- // (.pbm) or PPM (.ppm) files. If <dumpJPEG> is set, JPEG images
+ // (.pbm) or PPM (.ppm) files unless PNG or Tiff output is enabled
+ // (PNG is used if both are enabled). If Jpeg is enabled, JPEG images
// are written as JPEG (.jpg) files.
- ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA);
+ ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool listImagesA);
// Destructor.
virtual ~ImageOutputDev();
+ // Use PNG format for output
+ void enablePNG(GBool png) { outputPNG = png; }
+
+ // Use TIFF format for output
+ void enableTiff(GBool tiff) { outputTiff = tiff; }
+
+ // Use Jpeg format for Jpeg files
+ void enableJpeg(GBool jpeg) { dumpJPEG = jpeg; }
+
+ // Use Jpeg2000 format for Jpeg2000 files
+ void enableJpeg2000(GBool jp2) { dumpJP2 = jp2; }
+
+ // Use JBIG2 format for JBIG2 files
+ void enableJBig2(GBool jbig2) { dumpJBIG2 = jbig2; }
+
+ // Use CCITT format for CCITT files
+ void enableCCITT(GBool ccitt) { dumpCCITT = ccitt; }
+
// Check if file was successfully created.
virtual GBool isOk() { return ok; }
@@ -128,18 +154,21 @@ private:
GfxImageColorMap *colorMap,
GBool interpolate, GBool inlineImg,
ImageType imageType);
- void writeMask(GfxState *state, Object *ref, Stream *str,
- int width, int height, GBool invert,
- GBool interpolate, GBool inlineImg);
void writeImage(GfxState *state, Object *ref, Stream *str,
- int width, int height, GfxImageColorMap *colorMap,
- GBool interpolate, int *maskColors, GBool inlineImg);
-
+ int width, int height, GfxImageColorMap *colorMap, GBool inlineImg);
+ void writeRawImage(Stream *str, const char *ext);
+ void writeImageFile(ImgWriter *writer, ImageFormat format, const char *ext,
+ Stream *str, int width, int height, GfxImageColorMap *colorMap);
char *fileRoot; // root of output file names
char *fileName; // buffer for output file names
GBool listImages; // list images instead of dumping
GBool dumpJPEG; // set to dump native JPEG files
+ GBool dumpJP2; // set to dump native JPEG2000 files
+ GBool dumpJBIG2; // set to dump native JBIG2 files
+ GBool dumpCCITT; // set to dump native CCITT files
+ GBool outputPNG; // set to output in PNG format
+ GBool outputTiff; // set to output in TIFF format
GBool pageNames; // set to include page number in file names
int pageNum; // current page number
int imgNum; // current image number
diff --git a/utils/JSInfo.cc b/utils/JSInfo.cc
new file mode 100644
index 00000000..e3205c4c
--- /dev/null
+++ b/utils/JSInfo.cc
@@ -0,0 +1,233 @@
+//========================================================================
+//
+// JSInfo.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2013 Adrian Johnson <ajohnson@redneon.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+
+#include "config.h"
+#include <stdio.h>
+#include "Object.h"
+#include "Dict.h"
+#include "Annot.h"
+#include "PDFDoc.h"
+#include "JSInfo.h"
+#include "Link.h"
+#include "Form.h"
+#include "UnicodeMap.h"
+#include "UTF.h"
+
+JSInfo::JSInfo(PDFDoc *docA, int firstPage) {
+ doc = docA;
+ currentPage = firstPage + 1;
+}
+
+JSInfo::~JSInfo() {
+}
+
+void JSInfo::printJS(GooString *js) {
+ Unicode *u;
+ char buf[8];
+ int i, n, len;
+
+ if (!js || !js->getCString())
+ return;
+
+ len = TextStringToUCS4(js, &u);
+ for (i = 0; i < len; i++) {
+ n = uniMap->mapUnicode(u[i], buf, sizeof(buf));
+ fwrite(buf, 1, n, file);
+ }
+}
+
+void JSInfo::scanLinkAction(LinkAction *link, const char *action) {
+ if (!link)
+ return;
+
+ if (link->getKind() == actionJavaScript) {
+ hasJS = gTrue;
+ if (print) {
+ LinkJavaScript *linkjs = static_cast<LinkJavaScript *>(link);
+ GooString *s = linkjs->getScript();
+ if (s && s->getCString()) {
+ fprintf(file, "%s:\n", action);
+ printJS(s);
+ fputs("\n\n", file);
+ }
+ }
+ }
+
+ if (link->getKind() == actionRendition) {
+ LinkRendition *linkr = static_cast<LinkRendition *>(link);
+ if (linkr->getScript()) {
+ hasJS = gTrue;
+ if (print) {
+ GooString *s = linkr->getScript();
+ if (s && s->getCString()) {
+ fprintf(file, "%s (Rendition):\n", action);
+ printJS(s);
+ fputs("\n\n", file);
+ }
+ }
+ }
+ }
+}
+
+void JSInfo::scanJS(int nPages) {
+ print = gFalse;
+ file = NULL;
+ scan(nPages);
+}
+
+void JSInfo::scanJS(int nPages, FILE *fout, UnicodeMap *uMap) {
+ print = gTrue;
+ file = fout;
+ uniMap = uMap;
+ scan(nPages);
+}
+
+void JSInfo::scan(int nPages) {
+ Page *page;
+ Annots *annots;
+ Object obj1, obj2;
+ int lastPage;
+
+ hasJS = gFalse;
+
+ // Names
+ int numNames = doc->getCatalog()->numJS();
+ if (numNames > 0) {
+ hasJS = gTrue;
+ if (print) {
+ for (int i = 0; i < numNames; i++) {
+ fprintf(file, "Name Dictionary \"%s\":\n", doc->getCatalog()->getJSName(i)->getCString());
+ printJS(doc->getCatalog()->getJS(i));
+ fputs("\n\n", file);
+ }
+ }
+ }
+
+ // document actions
+ scanLinkAction(doc->getCatalog()->getAdditionalAction(Catalog::actionCloseDocument),
+ "Before Close Document");
+ scanLinkAction(doc->getCatalog()->getAdditionalAction(Catalog::actionSaveDocumentStart),
+ "Before Save Document");
+ scanLinkAction(doc->getCatalog()->getAdditionalAction(Catalog::actionSaveDocumentFinish),
+ "After Save Document");
+ scanLinkAction(doc->getCatalog()->getAdditionalAction(Catalog::actionPrintDocumentStart),
+ "Before Print Document");
+ scanLinkAction(doc->getCatalog()->getAdditionalAction(Catalog::actionPrintDocumentFinish),
+ "After Print Document");
+
+ // form field actions
+ if (doc->getCatalog()->getFormType() == Catalog::AcroForm) {
+ Form *form = doc->getCatalog()->getForm();
+ for (int i = 0; i < form->getNumFields(); i++) {
+ FormField *field = form->getRootField(i);
+ for (int j = 0; j < field->getNumWidgets(); j++) {
+ FormWidget *widget = field->getWidget(j);
+ scanLinkAction(widget->getActivationAction(),
+ "Field Activated");
+ scanLinkAction(widget->getAdditionalAction(Annot::actionFieldModified),
+ "Field Modified");
+ scanLinkAction(widget->getAdditionalAction(Annot::actionFormatField),
+ "Format Field");
+ scanLinkAction(widget->getAdditionalAction(Annot::actionValidateField),
+ "Validate Field");
+ scanLinkAction(widget->getAdditionalAction(Annot::actionCalculateField),
+ "Calculate Field");
+ }
+ }
+ }
+
+ // scan pages
+
+ if (currentPage > doc->getNumPages()) {
+ return;
+ }
+
+ lastPage = currentPage + nPages;
+ if (lastPage > doc->getNumPages() + 1) {
+ lastPage = doc->getNumPages() + 1;
+ }
+
+ for (int pg = currentPage; pg < lastPage; ++pg) {
+ page = doc->getPage(pg);
+ if (!page) continue;
+
+ // page actions (open, close)
+ scanLinkAction(page->getAdditionalAction(Page::actionOpenPage), "Page Open");
+ scanLinkAction(page->getAdditionalAction(Page::actionClosePage), "Page Close");
+
+ // annotation actions (links, screen, widget)
+ annots = page->getAnnots();
+ for (int i = 0; i < annots->getNumAnnots(); ++i) {
+ if (annots->getAnnot(i)->getType() == Annot::typeLink) {
+ AnnotLink *annot = static_cast<AnnotLink *>(annots->getAnnot(i));
+ scanLinkAction(annot->getAction(), "Link Annotation Activated");
+ } else if (annots->getAnnot(i)->getType() == Annot::typeScreen) {
+ AnnotScreen *annot = static_cast<AnnotScreen *>(annots->getAnnot(i));
+ scanLinkAction(annot->getAction(),
+ "Screen Annotation Activated");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionCursorEntering),
+ "Screen Annotation Cursor Enter");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionCursorLeaving),
+ "Screen Annotation Cursor Leave");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionMousePressed),
+ "Screen Annotation Mouse Pressed");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionMouseReleased),
+ "Screen Annotation Mouse Released");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionFocusIn),
+ "Screen Annotation Focus In");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionFocusOut),
+ "Screen Annotation Focus Out");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionPageOpening),
+ "Screen Annotation Page Open");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionPageClosing),
+ "Screen Annotation Page Close");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible),
+ "Screen Annotation Page Visible");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionPageInvisible),
+ "Screen Annotation Page Invisible");
+
+ } else if (annots->getAnnot(i)->getType() == Annot::typeWidget) {
+ AnnotWidget *annot = static_cast<AnnotWidget *>(annots->getAnnot(i));
+ scanLinkAction(annot->getAction(),
+ "Widget Annotation Activated");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionCursorEntering),
+ "Widget Annotation Cursor Enter");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionCursorLeaving),
+ "Widget Annotation Cursor Leave");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionMousePressed),
+ "Widget Annotation Mouse Pressed");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionMouseReleased),
+ "Widget Annotation Mouse Released");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionFocusIn),
+ "Widget Annotation Focus In");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionFocusOut),
+ "Widget Annotation Focus Out");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionPageOpening),
+ "Widget Annotation Page Open");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionPageClosing),
+ "Widget Annotation Page Close");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible),
+ "Widget Annotation Page Visible");
+ scanLinkAction(annot->getAdditionalAction(Annot::actionPageInvisible),
+ "Widget Annotation Page Invisible");
+ }
+ }
+ }
+
+ currentPage = lastPage;
+}
+
+GBool JSInfo::containsJS() {
+ return hasJS;
+};
diff --git a/utils/JSInfo.h b/utils/JSInfo.h
new file mode 100644
index 00000000..19b786ff
--- /dev/null
+++ b/utils/JSInfo.h
@@ -0,0 +1,60 @@
+//========================================================================
+//
+// JSInfo.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2013 Adrian Johnson <ajohnson@redneon.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#ifndef JS_INFO_H
+#define JS_INFO_H
+
+#include <stdio.h>
+#include "Object.h"
+#include "PDFDoc.h"
+#include "goo/gtypes.h"
+
+#include "Link.h"
+#include "UnicodeMap.h"
+
+class PDFDoc;
+
+class JSInfo {
+public:
+
+ // Constructor.
+ JSInfo(PDFDoc *doc, int firstPage = 0);
+
+ // Destructor.
+ ~JSInfo();
+
+ // scan for JS in the PDF
+ void scanJS(int nPages);
+
+ // scan and print JS in the PDF
+ void scanJS(int nPages, FILE *fout, UnicodeMap *uMap);
+
+ // return true if PDF contains JavaScript
+ GBool containsJS();
+
+private:
+
+ PDFDoc *doc;
+ int currentPage;
+ GBool hasJS;
+ GBool print;
+ FILE *file;
+ UnicodeMap *uniMap;
+
+ void scan(int nPages);
+ void scanLinkAction(LinkAction *link, const char *action);
+ void printJS(GooString *js);
+
+};
+
+#endif
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 0c954413..1dd9a128 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -88,12 +88,16 @@ pdfimages_SOURCES = \
pdfimages.cc \
ImageOutputDev.cc \
ImageOutputDev.h \
+ JSInfo.cc \
+ JSInfo.h \
$(common)
pdfinfo_SOURCES = \
pdfinfo.cc \
printencodings.cc \
printencodings.h \
+ JSInfo.cc \
+ JSInfo.h \
$(common)
pdftops_SOURCES = \
diff --git a/utils/pdfimages.1 b/utils/pdfimages.1
index 2929eca1..8485f3d3 100644
--- a/utils/pdfimages.1
+++ b/utils/pdfimages.1
@@ -10,17 +10,26 @@ pdfimages \- Portable Document Format (PDF) image extractor
.SH DESCRIPTION
.B Pdfimages
saves images from a Portable Document Format (PDF) file as Portable
-Pixmap (PPM), Portable Bitmap (PBM), or JPEG files.
+Pixmap (PPM), Portable Bitmap (PBM), Portable Network Graphics (PNG),
+Tagged Image File Format (TIFF), JPEG, JPEG2000, or JBIG2 files.
.PP
Pdfimages reads the PDF file
.IR PDF-file ,
-scans one or more pages, and writes one PPM, PBM, or JPEG file for each image,
+scans one or more pages, and writes one file for each image,
.IR image-root - nnn . xxx ,
where
.I nnn
is the image number and
.I xxx
-is the image type (.ppm, .pbm, .jpg).
+is the image type (.ppm, .pbm, .png, .tif, .jpg, jp2, jb2e, or jb2g).
+.PP
+The default output format is PBM (for monochrome images) or PPM for
+non-monochrome. The \-png or \-tiff options change to default output
+to PNG or TIFF respectively. If both \-png and \-tiff are specified,
+CMYK images will be written as TIFF and all other images will be
+written as PNG. In addition the \-j, \-jp2, and \-jbig2 options will
+cause JPEG, JPEG2000, and JBIG2, respectively, images in the PDF file
+to be written in their native format.
.SH OPTIONS
.TP
.BI \-f " number"
@@ -29,11 +38,61 @@ Specifies the first page to scan.
.BI \-l " number"
Specifies the last page to scan.
.TP
+.B \-png
+Change the default output format to PNG.
+.TP
+.B \-tiff
+Change the default output format to TIFF.
+.TP
.B \-j
-Normally, all images are written as PBM (for monochrome images) or PPM
-(for non-monochrome images) files. With this option, images in DCT
-format are saved as JPEG files. All non-DCT images are saved in
-PBM/PPM format as usual.
+Write images in JPEG format as JPEG files instead of the default format. The JPEG file is identical to the JPEG data stored in the PDF.
+.TP
+.B \-jp2
+Write images in JPEG2000 format as JP2 files instead of the default format. The JP2 file is identical to the JPEG2000 data stored in the PDF.
+.TP
+.B \-jbig2
+Write images in JBIG2 format as JBIG2 files instead of the default format. JBIG2 data in PDF is of the embedded type. The embedded type of JBIG2 has an optional separate file containing global data. The embedded data is written with the extension .jb2e and the global data (if available) will be written to the same image number with the extension .jb2g. The content of both these files is indentical to the JBIG2 data in the PDF.
+.TP
+.B \-ccitt
+Write images in CCITT format as CCITT files instead of the default
+format. The CCITT file is identical to the CCITT data stored in the
+PDF. PDF files contain additional parameters specifying
+how to decode the CCITT data. These parameters are translated to
+fax2tiff input options and written to a .params file with the same image
+number. The parameters are:
+.RS
+.TP
+.B \-1
+1D Group 3 encoding
+.TP
+.B \-2
+2D Group 3 encoding
+.TP
+.B \-4
+Group 4 encoding
+.TP
+.B \-A
+Beginning of line is aligned on a byte boundary
+.TP
+.B \-P
+Beginning of line is not aligned on a byte boundary
+.TP
+.B \-X n
+The image width in pixels
+.TP
+.B \-W
+Encoding uses 1 for black and 0 for white
+.TP
+.B \-B
+Encoding uses 0 for black and 1 for white
+.TP
+.B \-M
+Input data fills from most significant bit to least significant bit.
+.RE
+.TP
+.B \-all
+Write JPEG, JPEG2000, JBIG2, and CCITT images in their native format. CMYK files are written as TIFF files. All other images are written as PNG files.
+This is equivalent to specifying the options \-png \-tiff \-j \-jp2 \-jbig2 \-ccitt.
.TP
.B \-list
Instead of writing the images, list the images along with various information for each image. Do not specify an
@@ -134,6 +193,18 @@ ccitt - CCITT Group 3 or Group 4 Fax
.TP
.B object ID
the image dictionary object ID (number and generation)
+.TP
+.B x\-ppi
+The horizontal resolution of the image (in pixels per inch) when rendered on the pdf page.
+.TP
+.B y\-ppi
+The vertical resolution of the image (in pixels per inch) when rendered on the pdf page.
+.TP
+.B size
+The size of the embedded image in the pdf file. The following suffixes are used: 'B' bytes, 'K' kilobytes, 'M' megabytes, and 'G' gigabytes.
+.TP
+.B ratio
+The compression ratio of the embedded image.
.RE
.TP
.BI \-opw " password"
diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc
index 82c301c7..96709ed7 100644
--- a/utils/pdfimages.cc
+++ b/utils/pdfimages.cc
@@ -18,7 +18,7 @@
// Copyright (C) 2007-2008, 2010 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2010 Jakob Voss <jakob.voss@gbv.de>
-// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
+// Copyright (C) 2012, 2013 Adrian Johnson <ajohnson@redneon.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -50,7 +50,13 @@
static int firstPage = 1;
static int lastPage = 0;
static GBool listImages = gFalse;
+static GBool enablePNG = gFalse;
+static GBool enableTiff = gFalse;
static GBool dumpJPEG = gFalse;
+static GBool dumpJP2 = gFalse;
+static GBool dumpJBIG2 = gFalse;
+static GBool dumpCCITT = gFalse;
+static GBool allFormats = gFalse;
static GBool pageNames = gFalse;
static char ownerPassword[33] = "\001";
static char userPassword[33] = "\001";
@@ -63,8 +69,24 @@ static const ArgDesc argDesc[] = {
"first page to convert"},
{"-l", argInt, &lastPage, 0,
"last page to convert"},
+#if ENABLE_LIBPNG
+ {"-png", argFlag, &enablePNG, 0,
+ "change the default output format to PNG"},
+#endif
+#if ENABLE_LIBTIFF
+ {"-tiff", argFlag, &enableTiff, 0,
+ "change the default output format to TIFF"},
+#endif
{"-j", argFlag, &dumpJPEG, 0,
"write JPEG images as JPEG files"},
+ {"-jp2", argFlag, &dumpJP2, 0,
+ "write JPEG2000 images as JP2 files"},
+ {"-jbig2", argFlag, &dumpJBIG2, 0,
+ "write JBIG2 images as JBIG2 files"},
+ {"-ccitt", argFlag, &dumpCCITT, 0,
+ "write CCITT images as CCITT files"},
+ {"-all", argFlag, &allFormats, 0,
+ "equivalent to -png -tiff -j -jp2 -jbig2 -ccitt"},
{"-list", argFlag, &listImages, 0,
"print list of images instead of saving"},
{"-opw", argString, ownerPassword, sizeof(ownerPassword),
@@ -168,10 +190,25 @@ int main(int argc, char *argv[]) {
lastPage = doc->getNumPages();
// write image files
- imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG, listImages);
+ imgOut = new ImageOutputDev(imgRoot, pageNames, listImages);
if (imgOut->isOk()) {
- doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0,
- gTrue, gFalse, gFalse);
+ if (allFormats) {
+ imgOut->enablePNG(gTrue);
+ imgOut->enableTiff(gTrue);
+ imgOut->enableJpeg(gTrue);
+ imgOut->enableJpeg2000(gTrue);
+ imgOut->enableJBig2(gTrue);
+ imgOut->enableCCITT(gTrue);
+ } else {
+ imgOut->enablePNG(enablePNG);
+ imgOut->enableTiff(enableTiff);
+ imgOut->enableJpeg(dumpJPEG);
+ imgOut->enableJpeg2000(dumpJP2);
+ imgOut->enableJBig2(dumpJBIG2);
+ imgOut->enableCCITT(dumpCCITT);
+ }
+ doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0,
+ gTrue, gFalse, gFalse);
}
delete imgOut;
diff --git a/utils/pdfinfo.1 b/utils/pdfinfo.1
index a3ad1c36..1dd7466e 100644
--- a/utils/pdfinfo.1
+++ b/utils/pdfinfo.1
@@ -48,6 +48,9 @@ tagged (yes/no)
form (AcroForm / XFA / none)
.RE
.RS
+javascript (yes/no)
+.RE
+.RS
page count
.RE
.RS
@@ -90,6 +93,9 @@ TrimBox, and ArtBox.
Prints document-level metadata. (This is the "Metadata" stream from
the PDF file's Catalog object.)
.TP
++.B \-js
++Prints all JavaScript in the PDF.
++.TP
.B \-rawdates
Prints the raw (undecoded) date strings, directly from the PDF file.
.TP
diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index 14e4f6c6..22d9edd9 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -19,6 +19,7 @@
// Copyright (C) 2011 Vittal Aithal <vittal.aithal@cognidox.com>
// Copyright (C) 2012, 2013 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
+// Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -53,6 +54,7 @@
#include "UTF.h"
#include "Error.h"
#include "DateInfo.h"
+#include "JSInfo.h"
static void printInfoString(Dict *infoDict, const char *key, const char *text,
UnicodeMap *uMap);
@@ -63,6 +65,7 @@ static int firstPage = 1;
static int lastPage = 0;
static GBool printBoxes = gFalse;
static GBool printMetadata = gFalse;
+static GBool printJS = gFalse;
static GBool rawDates = gFalse;
static char textEncName[128] = "";
static char ownerPassword[33] = "\001";
@@ -80,6 +83,8 @@ static const ArgDesc argDesc[] = {
"print the page bounding boxes"},
{"-meta", argFlag, &printMetadata, 0,
"print the document metadata (XML)"},
+ {"-js", argFlag, &printJS, 0,
+ "print all JavaScript in the PDF"},
{"-rawdates", argFlag, &rawDates, 0,
"print the undecoded date strings directly from the PDF file"},
{"-enc", argString, textEncName, sizeof(textEncName),
@@ -225,8 +230,12 @@ int main(int argc, char *argv[]) {
info.free();
// print tagging info
- printf("Tagged: %s\n",
- doc->getStructTreeRoot()->isDict() ? "yes" : "no");
+ printf("Tagged: %s\n",
+ (doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked) ? "yes" : "no");
+ printf("UserProperties: %s\n",
+ (doc->getCatalog()->getMarkInfo() & Catalog::markInfoUserProperties) ? "yes" : "no");
+ printf("Suspects: %s\n",
+ (doc->getCatalog()->getMarkInfo() & Catalog::markInfoSuspects) ? "yes" : "no");
// print form info
switch (doc->getCatalog()->getFormType())
@@ -242,6 +251,13 @@ int main(int argc, char *argv[]) {
break;
}
+ // print javascript info
+ {
+ JSInfo jsInfo(doc, firstPage - 1);
+ jsInfo.scanJS(lastPage - firstPage + 1);
+ printf("JavaScript: %s\n", jsInfo.containsJS() ? "yes" : "no");
+ }
+
// print page count
printf("Pages: %d\n", doc->getNumPages());
@@ -371,6 +387,13 @@ int main(int argc, char *argv[]) {
delete metadata;
}
+ // print javascript
+ if (printJS) {
+ JSInfo jsInfo(doc, firstPage - 1);
+ fputs("\n", stdout);
+ jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap);
+ }
+
exitCode = 0;
// clean up
diff --git a/utils/pdfseparate.cc b/utils/pdfseparate.cc
index d7efcf08..dcb59f6b 100644
--- a/utils/pdfseparate.cc
+++ b/utils/pdfseparate.cc
@@ -7,6 +7,7 @@
// Copyright (C) 2011, 2012 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2012, 2013 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2013 Pino Toscano <pino@kde.org>
+// Copyright (C) 2013 Daniel Kahn Gillmor <dkg@fifthhorseman.net>
//
//========================================================================
#include "config.h"
@@ -55,6 +56,12 @@ bool extractPages (const char *srcFileName, const char *destFileName) {
return false;
}
+ // destFileName can have multiple %% and one %d
+ // We use auxDestFileName to replace all the valid % appearances
+ // by 'A' (random char that is not %), if at the end of replacing
+ // any of the valid appearances there is still any % around, the
+ // pattern is wrong
+ char *auxDestFileName = strdup(destFileName);
if (firstPage == 0 && lastPage == 0) {
firstPage = 1;
lastPage = doc->getNumPages();
@@ -63,29 +70,37 @@ bool extractPages (const char *srcFileName, const char *destFileName) {
lastPage = doc->getNumPages();
if (firstPage == 0)
firstPage = 1;
- if (firstPage != lastPage && strstr(destFileName, "%d") == NULL) {
- error(errSyntaxError, -1, "'{0:s}' must contain '%d' if more than one page should be extracted", destFileName);
+ bool foundmatch = false;
+ char *p = strstr(auxDestFileName, "%d");
+ if (p != NULL) {
+ foundmatch = true;
+ *p = 'A';
+ } else {
+ char pattern[5];
+ for (int i = 2; i < 10; i++) {
+ sprintf(pattern, "%%0%dd", i);
+ p = strstr(auxDestFileName, pattern);
+ if (p != NULL) {
+ foundmatch = true;
+ *p = 'A';
+ break;
+ }
+ }
+ }
+ if (!foundmatch && firstPage != lastPage) {
+ error(errSyntaxError, -1, "'{0:s}' must contain '%%d' if more than one page should be extracted", destFileName);
+ free(auxDestFileName);
return false;
}
-
- // destFileName can have multiple %% and one %d
- // We use auxDestFileName to replace all the valid % appearances
- // by 'A' (random char that is not %), if at the end of replacing
- // any of the valid appearances there is still any % around, the
- // pattern is wrong
- char *auxDestFileName = strdup(destFileName);
- // %% can appear as many times as you want
- char *p = strstr(auxDestFileName, "%%");
+
+ // at this point auxDestFileName can only contain %%
+ p = strstr(auxDestFileName, "%%");
while (p != NULL) {
*p = 'A';
*(p + 1) = 'A';
p = strstr(p, "%%");
}
- // %d can appear only one time
- p = strstr(auxDestFileName, "%d");
- if (p != NULL) {
- *p = 'A';
- }
+
// at this point any other % is wrong
p = strstr(auxDestFileName, "%");
if (p != NULL) {
diff --git a/utils/pdftocairo.cc b/utils/pdftocairo.cc
index e1fb2584..f6ddaaeb 100644
--- a/utils/pdftocairo.cc
+++ b/utils/pdftocairo.cc
@@ -19,7 +19,7 @@
// Copyright (C) 2009 Shen Liang <shenzhuxi@gmail.com>
// Copyright (C) 2009 Stefan Thomas <thomas@eload24.com>
// Copyright (C) 2009, 2010 Albert Astals Cid <aacid@kde.org>
-// Copyright (C) 2010, 2011, 2012 Adrian Johnson <ajohnson@redneon.com>
+// Copyright (C) 2010, 2011-2013 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2010 Jonathan Liu <net147@gmail.com>
// Copyright (C) 2010 William Bader <williambader@hotmail.com>
@@ -371,7 +371,7 @@ void writePageImage(GooString *filename)
int b = (*pixel & 0x000000ff) >> 0;
// an arbitrary integer approximation of .3*r + .59*g + .11*b
int y = (r*19661+g*38666+b*7209 + 32829)>>16;
- if (tiff && mono) {
+ if (mono) {
if (bit == 7)
*rowp = 0;
if (y > 127)
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index bcec8cb7..af7251cb 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -16,7 +16,7 @@
// Copyright (C) 2007-2008, 2010, 2012 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2010 Mike Slegeir <tehpola@yahoo.com>
-// Copyright (C) 2010 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
+// Copyright (C) 2010, 2013 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
// Copyright (C) 2011 Steven Murdoch <Steven.Murdoch@cl.cam.ac.uk>
// Copyright (C) 2012 Igor Slepchin <igor.redhat@gmail.com>
@@ -104,8 +104,12 @@ static const ArgDesc argDesc[] = {
"don't print any messages or errors"},
{"-h", argFlag, &printHelp, 0,
"print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
{"-help", argFlag, &printHelp, 0,
"print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
{"-p", argFlag, &printHtml, 0,
"exchange .pdf links by .html"},
{"-c", argFlag, &complexMode, 0,
diff --git a/utils/pdftoppm.1 b/utils/pdftoppm.1
index 2321d6d0..7f88b6d0 100644
--- a/utils/pdftoppm.1
+++ b/utils/pdftoppm.1
@@ -102,6 +102,18 @@ Specifies the TIFF compression type. This defaults to "none".
Enable or disable FreeType (a TrueType / Type 1 font rasterizer).
This defaults to "yes".
.TP
+.BI \-thinlinemode " none | solid | shape"
+Specifies the thin line mode. This defaults to "none".
+.TP
+"solid":
+adjust lines with a width less than one pixel to pixel boundary
+and paint it with a width of one pixel.
+.TP
+"shape":
+adjust lines with a width less than one pixel to pixel boundary
+and paint it with a width of one pixel but with a shape in proportion
+to its width.
+.TP
.BI \-aa " yes | no"
Enable or disable font anti-aliasing. This defaults to "yes".
.TP
diff --git a/utils/pdftoppm.cc b/utils/pdftoppm.cc
index 73f337c2..a3d4d10b 100644
--- a/utils/pdftoppm.cc
+++ b/utils/pdftoppm.cc
@@ -94,6 +94,8 @@ static char vectorAntialiasStr[16] = "";
static char ownerPassword[33] = "";
static char userPassword[33] = "";
static char TiffCompressionStr[16] = "";
+static char thinLineModeStr[8] = "";
+static SplashThinLineMode thinLineMode = splashThinLineDefault;
#ifdef UTILS_USE_PTHREADS
static int numberOfJobs = 1;
#endif // UTILS_USE_PTHREADS
@@ -169,6 +171,8 @@ static const ArgDesc argDesc[] = {
{"-freetype", argString, enableFreeTypeStr, sizeof(enableFreeTypeStr),
"enable FreeType font rasterizer: yes, no"},
#endif
+ {"-thinlinemode", argString, thinLineModeStr, sizeof(thinLineModeStr),
+ "set thin line mode: none, solid, shape. Default: none"},
{"-aa", argString, antialiasStr, sizeof(antialiasStr),
"enable font anti-aliasing: yes, no"},
@@ -283,7 +287,7 @@ static void processPageJobs() {
#if SPLASH_CMYK
(jpegcmyk || overprint) ? splashModeDeviceN8 :
#endif
- splashModeRGB8, 4, gFalse, *pageJob.paperColor);
+ splashModeRGB8, 4, gFalse, *pageJob.paperColor, gTrue, gTrue, thinLineMode);
splashOut->startDoc(pageJob.doc);
savePageSlice(pageJob.doc, splashOut, pageJob.pg, x, y, w, h, pageJob.pg_w, pageJob.pg_h, pageJob.ppmFile);
@@ -368,6 +372,15 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "Bad '-aaVector' value on command line\n");
}
}
+ if (thinLineModeStr[0]) {
+ if (strcmp(thinLineModeStr, "solid") == 0) {
+ thinLineMode = splashThinLineSolid;
+ } else if (strcmp(thinLineModeStr, "shape") == 0) {
+ thinLineMode = splashThinLineShape;
+ } else if (strcmp(thinLineModeStr, "none") != 0) {
+ fprintf(stderr, "Bad '-thinlinemode' value on command line\n");
+ }
+ }
if (quiet) {
globalParams->setErrQuiet(quiet);
}
@@ -444,7 +457,7 @@ int main(int argc, char *argv[]) {
(jpegcmyk || overprint) ? splashModeDeviceN8 :
#endif
splashModeRGB8, 4,
- gFalse, paperColor);
+ gFalse, paperColor, gTrue, gTrue, thinLineMode);
splashOut->startDoc(doc);
#endif // UTILS_USE_PTHREADS
diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc
index 3b8b69f2..f7b2b0e5 100644
--- a/utils/pdftotext.cc
+++ b/utils/pdftotext.cc
@@ -385,9 +385,9 @@ int main(int argc, char *argv[]) {
}
} else {
- delete textOut;
- exitCode = 2;
- goto err3;
+ delete textOut;
+ exitCode = 2;
+ goto err3;
}
}
delete textOut;