2005-12-12 Kristian Høgsberg <krh@redhat.com>

* Makefile.am: * configure.ac: * goo/GooVector.h: * utils/HtmlFonts.cc: * utils/HtmlFonts.h: * utils/HtmlLinks.cc: * utils/HtmlLinks.h: * utils/HtmlOutputDev.cc: * utils/HtmlOutputDev.h: * utils/ImageOutputDev.cc: * utils/ImageOutputDev.h: * utils/Makefile.am: * utils/parseargs.c: * utils/parseargs.h: * utils/pdffonts.1: * utils/pdffonts.cc: * utils/pdfimages.1: * utils/pdfimages.cc: * utils/pdfinfo.1: * utils/pdfinfo.cc: * utils/pdftohtml.1: * utils/pdftohtml.cc: * utils/pdftoppm.1: * utils/pdftoppm.cc: * utils/pdftops.1: * utils/pdftops.cc: * utils/pdftotext.1: * utils/pdftotext.cc: Add command line utilities from xpdf.
author: Kristian Høgsberg <krh@redhat.com> 2005-12-12 20:15:11 +0000
committer: Kristian Høgsberg <krh@redhat.com> 2005-12-12 20:15:11 +0000
commit: bcc5e3afe27c8787ce7022a0701997c96eddb4fe (patch)
tree: 31c1727f926945dd49ef3d8dd56b9f6f0ef07618
parent: 5fbded32741acb5fac411189f80cb57aa11df517 (diff)
29 files changed, 6194 insertions, 9 deletions
diff --git a/ChangeLog b/ChangeLog
index 97f611a6..a556ecdc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,34 @@
+2005-12-12  Kristian Høgsberg  <krh@redhat.com>
+
+	* Makefile.am:
+	* configure.ac:
+	* goo/GooVector.h:
+	* utils/HtmlFonts.cc:
+	* utils/HtmlFonts.h:
+	* utils/HtmlLinks.cc:
+	* utils/HtmlLinks.h:
+	* utils/HtmlOutputDev.cc:
+	* utils/HtmlOutputDev.h:
+	* utils/ImageOutputDev.cc:
+	* utils/ImageOutputDev.h:
+	* utils/Makefile.am:
+	* utils/parseargs.c:
+	* utils/parseargs.h:
+	* utils/pdffonts.1:
+	* utils/pdffonts.cc:
+	* utils/pdfimages.1:
+	* utils/pdfimages.cc:
+	* utils/pdfinfo.1:
+	* utils/pdfinfo.cc:
+	* utils/pdftohtml.1:
+	* utils/pdftohtml.cc:
+	* utils/pdftoppm.1:
+	* utils/pdftoppm.cc:
+	* utils/pdftops.1:
+	* utils/pdftops.cc:
+	* utils/pdftotext.1:
+	* utils/pdftotext.cc: Add command line utilities from xpdf.
+
 2005-12-10  Albert Astals Cid  <aacid@kde.org>
 
 	* qt4/src/poppler-page.cc:
diff --git a/Makefile.am b/Makefile.am
index a8d53613..9bcf97e8 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -22,7 +22,11 @@ qt4_subdir = qt4
 qt4_pc_file = poppler-qt4.pc
 endif
 
-SUBDIRS = goo fofi $(splash_subdir) poppler $(glib_subdir) $(qt_subdir) test $(qt4_subdir)
+if BUILD_UTILS
+utils_subdir = utils
+endif
+
+SUBDIRS = goo fofi $(splash_subdir) poppler $(utils_subdir) $(glib_subdir) $(qt_subdir) test $(qt4_subdir)
 
 EXTRA_DIST =					\
 	README-XPDF				\
diff --git a/configure.ac b/configure.ac
index e7f6ee36..b7b5961b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -479,6 +479,13 @@ fi
 AM_CONDITIONAL(BUILD_GTK_TEST, test x$enable_gtk_test = xyes)
 
 
+AC_ARG_ENABLE(utils,
+              AC_HELP_STRING([--disable-utils],
+	                     [Don't compile poppler command line utils.]),
+              enable_utils=$enableval,
+              enable_utils="yes")
+AM_CONDITIONAL(BUILD_UTILS, test x$enable_utils = xyes)
+
 AC_ARG_ENABLE(compile-warnings,
               AC_HELP_STRING([--enable-compile-warnings=@<:@no/yes/kde@:>@]
                              [Turn on compiler warnings.]),,
@@ -505,6 +512,7 @@ goo/Makefile
 fofi/Makefile
 splash/Makefile
 poppler/Makefile
+utils/Makefile
 glib/Makefile
 test/Makefile
 qt/Makefile
@@ -521,11 +529,12 @@ poppler-qt4.pc])
 
 echo ""
 echo "Building poppler with support for:"
-echo "  splash output: $enable_splash_output"
-echo "  cairo output:  $enable_cairo_output"
-echo "  qt wrapper:    $enable_poppler_qt"
-echo "  qt4 wrapper:   $enable_poppler_qt4"
-echo "  qt4 unittests: $enable_poppler_qt4testlib"
-echo "  glib wrapper:  $enable_poppler_glib"
-echo "  use libjpeg:   $enable_libjpeg"
-echo "  use zlib:      $enable_zlib"
+echo "  splash output:      $enable_splash_output"
+echo "  cairo output:       $enable_cairo_output"
+echo "  qt wrapper:         $enable_poppler_qt"
+echo "  qt4 wrapper:        $enable_poppler_qt4"
+echo "  qt4 unittests:      $enable_poppler_qt4testlib"
+echo "  glib wrapper:       $enable_poppler_glib"
+echo "  use libjpeg:        $enable_libjpeg"
+echo "  use zlib:           $enable_zlib"
+echo "  command line utils: $enable_poppler_glib"
diff --git a/goo/GooVector.h b/goo/GooVector.h
new file mode 100644
index 00000000..3cd551b9
--- /dev/null
+++ b/goo/GooVector.h
@@ -0,0 +1,101 @@
+#ifndef _VECTOR_H
+#define _VECTOR_H
+#include "goo/gtypes.h"
+
+
+template<class T>
+class GooVector{
+private:
+   
+   int _size;
+   T*  last;
+   T*  storage; 
+ 
+   void resize(){
+     if (_size==0) _size=2;else _size=2*_size;
+      T *tmp=new T[_size];
+     if (storage){
+       last=copy(storage,last,tmp);
+       delete [] storage;
+      }
+     else last=tmp; 
+     storage=tmp;
+    }
+
+   T* copy(T* src1,T* scr2,T* dest){
+     T* tmp=src1;
+     T* d=dest;
+      while(tmp!=scr2){
+        *d=*tmp;
+         d++;tmp++;
+       }
+      return d;
+   }
+
+public:
+ typedef T* iterator;
+
+ GooVector(){
+  _size=0;
+  last=0;
+  storage=0;
+}
+
+
+
+virtual ~GooVector(){
+  delete[] storage ;
+}  
+
+void reset(){
+  last=storage;
+}
+
+int size(){
+  return (last-storage);
+}   
+void push_back(const T& elem){
+  if (!storage||(size() >=_size)) resize();
+        *last=elem;
+         last++;
+  
+     
+} 
+
+
+T pop_back() {
+    if (last!=storage) last--;
+
+    return *last;
+} 
+
+
+T operator[](unsigned int i){
+ return *(storage+i);
+}
+  
+
+GBool isEmpty() const{
+ return !_size || (last==storage) ;
+}
+
+
+
+iterator begin() const{
+ return storage;
+}
+
+iterator end() const {
+  return last;
+}
+};
+#endif
+
+
+
+   
+  
+  
+
+
+
diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc
new file mode 100644
index 00000000..c77683b0
--- /dev/null
+++ b/utils/HtmlFonts.cc
@@ -0,0 +1,326 @@
+#include "HtmlFonts.h"
+#include "GlobalParams.h"
+#include "UnicodeMap.h"
+#include <stdio.h>
+
+ struct Fonts{
+    char *Fontname;
+    char *name;
+  };
+
+const int font_num=13;
+
+static Fonts fonts[font_num+1]={  
+     {"Courier",               "Courier" },
+     {"Courier-Bold",           "Courier"},
+     {"Courier-BoldOblique",    "Courier"},
+     {"Courier-Oblique",        "Courier"},
+     {"Helvetica",              "Helvetica"},
+     {"Helvetica-Bold",         "Helvetica"},
+     {"Helvetica-BoldOblique",  "Helvetica"},
+     {"Helvetica-Oblique",      "Helvetica"},
+     {"Symbol",                 "Symbol"   },
+     {"Times-Bold",             "Times"    },
+     {"Times-BoldItalic",       "Times"    },
+     {"Times-Italic",           "Times"    },
+     {"Times-Roman",            "Times"    },
+     {" "          ,            "Times"    },
+};
+
+#define xoutRound(x) ((int)(x + 0.5))
+extern GBool xml;
+
+GooString* HtmlFont::DefaultFont=new GooString("Times"); // Arial,Helvetica,sans-serif
+
+HtmlFontColor::HtmlFontColor(GfxRGB rgb){
+  r=static_cast<int>(255*rgb.r);
+  g=static_cast<int>(255*rgb.g);
+  b=static_cast<int>(255*rgb.b);
+  if (!(Ok(r)&&Ok(b)&&Ok(g))) {printf("Error : Bad color \n");r=0;g=0;b=0;}
+}
+
+GooString *HtmlFontColor::convtoX(unsigned int xcol) const{
+  GooString *xret=new GooString();
+  char tmp;
+  unsigned  int k;
+  k = (xcol/16);
+  if ((k>=0)&&(k<10)) tmp=(char) ('0'+k); else tmp=(char)('a'+k-10);
+  xret->append(tmp);
+  k = (xcol%16);
+  if ((k>=0)&&(k<10)) tmp=(char) ('0'+k); else tmp=(char)('a'+k-10);
+  xret->append(tmp);
+ return xret;
+}
+
+GooString *HtmlFontColor::toString() const{
+  GooString *tmp=new GooString("#");
+  GooString *tmpr=convtoX(r); 
+  GooString *tmpg=convtoX(g);
+  GooString *tmpb=convtoX(b);
+  tmp->append(tmpr);
+  tmp->append(tmpg);
+  tmp->append(tmpb);
+  delete tmpr;
+  delete tmpg;
+  delete tmpb;
+  return tmp;
+} 
+
+HtmlFont::HtmlFont(GooString* ftname,int _size, GfxRGB rgb){
+  //if (col) color=HtmlFontColor(col); 
+  //else color=HtmlFontColor();
+  color=HtmlFontColor(rgb);
+
+  GooString *fontname = NULL;
+
+  if( ftname ){
+    fontname = new GooString(ftname);
+    FontName=new GooString(ftname);
+  }
+  else {
+    fontname = NULL;
+    FontName = NULL;
+  }
+  
+  lineSize = -1;
+
+  size=(_size-1);
+  italic = gFalse;
+  bold = gFalse;
+
+  if (fontname){
+    if (strstr(fontname->lowerCase()->getCString(),"bold"))  bold=gTrue;
+    
+    if (strstr(fontname->lowerCase()->getCString(),"italic")||
+	strstr(fontname->lowerCase()->getCString(),"oblique"))  italic=gTrue;
+    
+    int i=0;
+    while (strcmp(ftname->getCString(),fonts[i].Fontname)&&(i<font_num)) 
+	{
+		i++;
+	}
+    pos=i;
+    delete fontname;
+  }  
+  if (!DefaultFont) DefaultFont=new GooString(fonts[font_num].name);
+
+}
+ 
+HtmlFont::HtmlFont(const HtmlFont& x){
+   size=x.size;
+   lineSize=x.lineSize;
+   italic=x.italic;
+   bold=x.bold;
+   pos=x.pos;
+   color=x.color;
+   if (x.FontName) FontName=new GooString(x.FontName);
+ }
+
+
+HtmlFont::~HtmlFont(){
+  if (FontName) delete FontName;
+}
+
+HtmlFont& HtmlFont::operator=(const HtmlFont& x){
+   if (this==&x) return *this; 
+   size=x.size;
+   lineSize=x.lineSize;
+   italic=x.italic;
+   bold=x.bold;
+   pos=x.pos;
+   color=x.color;
+   if (FontName) delete FontName;
+   if (x.FontName) FontName=new GooString(x.FontName);
+   return *this;
+}
+
+void HtmlFont::clear(){
+  if(DefaultFont) delete DefaultFont;
+  DefaultFont = NULL;
+}
+
+
+
+/*
+  This function is used to compare font uniquily for insertion into
+  the list of all encountered fonts
+*/
+GBool HtmlFont::isEqual(const HtmlFont& x) const{
+  return ((size==x.size) &&
+	  (lineSize==x.lineSize) &&
+	  (pos==x.pos) && (bold==x.bold) && (italic==x.italic) &&
+	  (color.isEqual(x.getColor())));
+}
+
+/*
+  This one is used to decide whether two pieces of text can be joined together
+  and therefore we don't care about bold/italics properties
+*/
+GBool HtmlFont::isEqualIgnoreBold(const HtmlFont& x) const{
+  return ((size==x.size) &&
+	  (!strcmp(fonts[pos].name, fonts[x.pos].name)) &&
+	  (color.isEqual(x.getColor())));
+}
+
+GooString* HtmlFont::getFontName(){
+   if (pos!=font_num) return new GooString(fonts[pos].name);
+    else return new GooString(DefaultFont);
+}
+
+GooString* HtmlFont::getFullName(){
+  if (FontName)
+    return new GooString(FontName);
+  else return new GooString(DefaultFont);
+} 
+
+void HtmlFont::setDefaultFont(GooString* defaultFont){
+  if (DefaultFont) delete DefaultFont;
+  DefaultFont=new GooString(defaultFont);
+}
+
+
+GooString* HtmlFont::getDefaultFont(){
+  return DefaultFont;
+}
+
+// this method if plain wrong todo
+GooString* HtmlFont::HtmlFilter(Unicode* u, int uLen) {
+  GooString *tmp = new GooString();
+  UnicodeMap *uMap;
+  char buf[8];
+  int n;
+
+  // get the output encoding
+  if (!(uMap = globalParams->getTextEncoding())) {
+    return tmp;
+  }
+
+  for (int i = 0; i < uLen; ++i) {
+    switch (u[i])
+      { 
+	case '"': tmp->append("&quot;");  break;
+	case '&': tmp->append("&amp;");  break;
+	case '<': tmp->append("&lt;");  break;
+	case '>': tmp->append("&gt;");  break;
+	default:  
+	  {
+	    // convert unicode to string
+	    if ((n = uMap->mapUnicode(u[i], buf, sizeof(buf))) > 0) {
+	      tmp->append(buf, n); 
+	  }
+      }
+    }
+  }
+
+  uMap->decRefCnt();
+  return tmp;
+}
+
+GooString* HtmlFont::simple(HtmlFont* font, Unicode* content, int uLen){
+  GooString *cont=HtmlFilter (content, uLen); 
+
+  /*if (font.isBold()) {
+    cont->insert(0,"<b>",3);
+    cont->append("</b>",4);
+  }
+  if (font.isItalic()) {
+    cont->insert(0,"<i>",3);
+    cont->append("</i>",4);
+    } */
+
+  return cont;
+}
+
+HtmlFontAccu::HtmlFontAccu(){
+  accu=new GooVector<HtmlFont>();
+}
+
+HtmlFontAccu::~HtmlFontAccu(){
+  if (accu) delete accu;
+}
+
+int HtmlFontAccu::AddFont(const HtmlFont& font){
+ GooVector<HtmlFont>::iterator i; 
+ for (i=accu->begin();i!=accu->end();i++)
+ {
+	if (font.isEqual(*i)) 
+	{
+		return (int)(i-(accu->begin()));
+	}
+ }
+
+ accu->push_back(font);
+ return (accu->size()-1);
+}
+
+// get CSS font name for font #i 
+GooString* HtmlFontAccu::getCSStyle(int i, GooString* content){
+  GooString *tmp;
+  GooString *iStr=GooString::fromInt(i);
+  
+  if (!xml) {
+    tmp = new GooString("<span class=\"ft");
+    tmp->append(iStr);
+    tmp->append("\">");
+    tmp->append(content);
+    tmp->append("</span>");
+  } else {
+    tmp = new GooString("");
+    tmp->append(content);
+  }
+
+  delete iStr;
+  return tmp;
+}
+
+// get CSS font definition for font #i 
+GooString* HtmlFontAccu::CSStyle(int i){
+   GooString *tmp=new GooString();
+   GooString *iStr=GooString::fromInt(i);
+
+   GooVector<HtmlFont>::iterator g=accu->begin();
+   g+=i;
+   HtmlFont font=*g;
+   GooString *Size=GooString::fromInt(font.getSize());
+   GooString *colorStr=font.getColor().toString();
+   GooString *fontName=font.getFontName();
+   GooString *lSize;
+   
+   if(!xml){
+     tmp->append(".ft");
+     tmp->append(iStr);
+     tmp->append("{font-size:");
+     tmp->append(Size);
+     if( font.getLineSize() != -1 )
+     {
+	 lSize = GooString::fromInt(font.getLineSize());
+	 tmp->append("px;line-height:");
+	 tmp->append(lSize);
+	 delete lSize;
+     }
+     tmp->append("px;font-family:");
+     tmp->append(fontName); //font.getFontName());
+     tmp->append(";color:");
+     tmp->append(colorStr);
+     tmp->append(";}");
+   }
+   if (xml) {
+     tmp->append("<fontspec id=\"");
+     tmp->append(iStr);
+     tmp->append("\" size=\"");
+     tmp->append(Size);
+     tmp->append("\" family=\"");
+     tmp->append(fontName); //font.getFontName());
+     tmp->append("\" color=\"");
+     tmp->append(colorStr);
+     tmp->append("\"/>");
+   }
+
+   delete fontName;
+   delete colorStr;
+   delete iStr;
+   delete Size;
+   return tmp;
+}
+ 
+
diff --git a/utils/HtmlFonts.h b/utils/HtmlFonts.h
new file mode 100644
index 00000000..3ff5b81a
--- /dev/null
+++ b/utils/HtmlFonts.h
@@ -0,0 +1,85 @@
+#ifndef _HTML_FONTS_H
+#define _HTML_FONTS_H
+#include "goo/GooVector.h"
+#include "goo/GooString.h"
+#include "GfxState.h"
+#include "CharTypes.h"
+
+
+class HtmlFontColor{
+ private:
+   unsigned int r;
+   unsigned int g;
+   unsigned int b;
+   GBool Ok(unsigned int xcol){ return ((xcol<=255)&&(xcol>=0));}
+   GooString *convtoX(unsigned  int xcol) const;
+ public:
+   HtmlFontColor():r(0),g(0),b(0){}
+   HtmlFontColor(GfxRGB rgb);
+   HtmlFontColor(const HtmlFontColor& x){r=x.r;g=x.g;b=x.b;}
+   HtmlFontColor& operator=(const HtmlFontColor &x){
+     r=x.r;g=x.g;b=x.b;
+     return *this;
+   }
+   ~HtmlFontColor(){};
+   GooString* toString() const;
+   GBool isEqual(const HtmlFontColor& col) const{
+     return ((r==col.r)&&(g==col.g)&&(b==col.b));
+   }
+} ;  
+
+
+class HtmlFont{
+ private:
+   unsigned int size;
+   int lineSize;
+   GBool italic;
+   GBool bold;
+   int pos; // position of the font name in the fonts array
+   static GooString *DefaultFont;
+   GooString *FontName;
+   HtmlFontColor color;
+   static GooString* HtmlFilter(Unicode* u, int uLen); //char* s);
+public:  
+
+   HtmlFont(){FontName=NULL;};
+   HtmlFont(GooString* fontname,int _size, GfxRGB rgb);
+   HtmlFont(const HtmlFont& x);
+   HtmlFont& operator=(const HtmlFont& x);
+   HtmlFontColor getColor() const {return color;}
+   ~HtmlFont();
+   static void clear();
+   GooString* getFullName();
+   GBool isItalic() const {return italic;}
+   GBool isBold() const {return bold;}
+   unsigned int getSize() const {return size;}
+   int getLineSize() const {return lineSize;}
+   void setLineSize(int _lineSize) { lineSize = _lineSize; }
+   GooString* getFontName();
+   static GooString* getDefaultFont();
+   static void setDefaultFont(GooString* defaultFont);
+   GBool isEqual(const HtmlFont& x) const;
+   GBool isEqualIgnoreBold(const HtmlFont& x) const;
+   static GooString* simple(HtmlFont *font, Unicode *content, int uLen);
+   void print() const {printf("font: %s %d %s%spos: %d\n", FontName->getCString(), size, bold ? "bold " : "", italic ? "italic " : "", pos);};
+};
+
+class HtmlFontAccu{
+private:
+  GooVector<HtmlFont> *accu;
+  
+public:
+  HtmlFontAccu();
+  ~HtmlFontAccu();
+  int AddFont(const HtmlFont& font);
+  HtmlFont* Get(int i){
+    GooVector<HtmlFont>::iterator g=accu->begin();
+    g+=i;  
+    return g;
+  } 
+  GooString* getCSStyle (int i, GooString* content);
+  GooString* CSStyle(int i);
+  int size() const {return accu->size();}
+  
+};  
+#endif
diff --git a/utils/HtmlLinks.cc b/utils/HtmlLinks.cc
new file mode 100644
index 00000000..3010be5e
--- /dev/null
+++ b/utils/HtmlLinks.cc
@@ -0,0 +1,101 @@
+#include "HtmlLinks.h"
+
+HtmlLink::HtmlLink(const HtmlLink& x){
+  Xmin=x.Xmin;
+  Ymin=x.Ymin;
+  Xmax=x.Xmax;
+  Ymax=x.Ymax;
+  dest=new GooString(x.dest);
+}
+
+HtmlLink::HtmlLink(double xmin,double ymin,double xmax,double ymax,GooString * _dest)
+{
+   if (xmin < xmax) {
+    Xmin=xmin;
+    Xmax=xmax;
+  } else {
+    Xmin=xmax;
+    Xmax=xmin;
+  }
+  if (ymin < ymax) {
+    Ymin=ymin;
+    Ymax=ymax;
+  } else {
+    Ymin=ymax;
+    Ymax=ymin;
+  }                    
+  dest=new GooString(_dest);
+}
+
+HtmlLink::~HtmlLink(){
+ if (dest) delete dest;
+}
+
+GBool HtmlLink::isEqualDest(const HtmlLink& x) const{
+  return (!strcmp(dest->getCString(), x.dest->getCString()));
+}
+
+GBool HtmlLink::inLink(double xmin,double ymin,double xmax,double ymax) const {
+  double y=(ymin+ymax)/2;
+  if (y>Ymax) return gFalse;
+  return (y>Ymin)&&(xmin<Xmax)&&(xmax>Xmin);
+ }
+  
+
+HtmlLink& HtmlLink::operator=(const HtmlLink& x){
+  if (this==&x) return *this;
+  if (dest) {delete dest;dest=NULL;} 
+  Xmin=x.Xmin;
+  Ymin=x.Ymin;
+  Xmax=x.Xmax;
+  Ymax=x.Ymax;
+  dest=new GooString(x.dest);
+  return *this;
+} 
+
+GooString* HtmlLink::getLinkStart() {
+  GooString *res = new GooString("<A href=\"");
+  res->append(dest);
+  res->append("\">");
+  return res;
+}
+
+/*GooString* HtmlLink::Link(GooString* content){
+  //GooString* _dest=new GooString(dest);
+  GooString *tmp=new GooString("<a href=\"");
+  tmp->append(dest);
+  tmp->append("\">");
+  tmp->append(content);
+  tmp->append("</a>");
+  //delete _dest;
+  return tmp;
+  }*/
+
+   
+
+HtmlLinks::HtmlLinks(){
+  accu=new GooVector<HtmlLink>();
+}
+
+HtmlLinks::~HtmlLinks(){
+  delete accu;
+  accu=NULL; 
+}
+
+GBool HtmlLinks::inLink(double xmin,double ymin,double xmax,double ymax,int& p)const {
+  
+  for(GooVector<HtmlLink>::iterator i=accu->begin();i!=accu->end();i++){
+    if (i->inLink(xmin,ymin,xmax,ymax)) {
+        p=(i - accu->begin());
+        return 1;
+    }
+   }
+  return 0;
+}
+
+HtmlLink* HtmlLinks::getLink(int i) const{
+  GooVector<HtmlLink>::iterator g=accu->begin();
+  g+=i; 
+  return g;
+}
+
diff --git a/utils/HtmlLinks.h b/utils/HtmlLinks.h
new file mode 100644
index 00000000..71f8065e
--- /dev/null
+++ b/utils/HtmlLinks.h
@@ -0,0 +1,49 @@
+#ifndef _HTML_LINKS
+#define _HTML_LINKS
+
+#include <stdlib.h>
+#include <string.h>
+#include "goo/GooVector.h"
+#include "goo/GooString.h"
+
+class HtmlLink{
+
+private:  
+  double Xmin;
+  double Ymin;
+  double Xmax;
+  double Ymax;
+  GooString* dest;
+
+public:
+  HtmlLink(){dest=NULL;}
+  HtmlLink(const HtmlLink& x);
+  HtmlLink& operator=(const HtmlLink& x);
+  HtmlLink(double xmin,double ymin,double xmax,double ymax,GooString *_dest);
+  ~HtmlLink();
+  GBool isEqualDest(const HtmlLink& x) const;
+  GooString *getDest(){return new GooString(dest);}
+  double getX1() const {return Xmin;}
+  double getX2() const {return Xmax;}
+  double getY1() const {return Ymin;}
+  double getY2() const {return Ymax;}
+  GBool inLink(double xmin,double ymin,double xmax,double ymax) const ;
+  //GooString *Link(GooString *content);
+  GooString* getLinkStart();
+  
+};
+
+class HtmlLinks{
+private:
+ GooVector<HtmlLink> *accu;
+public:
+ HtmlLinks();
+ ~HtmlLinks();
+ void AddLink(const HtmlLink& x) {accu->push_back(x);}
+ GBool inLink(double xmin,double ymin,double xmax,double ymax,int& p) const;
+ HtmlLink* getLink(int i) const;
+
+};
+
+#endif
+   
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
new file mode 100644
index 00000000..fb8b66aa
--- /dev/null
+++ b/utils/HtmlOutputDev.cc
@@ -0,0 +1,1569 @@
+//========================================================================
+//
+// HtmlOutputDev.cc
+//
+// Copyright 1997-2002 Glyph & Cog, LLC
+//
+// Changed 1999-2000 by G.Ovtcharov
+//
+// Changed 2002 by Mikhail Kruk
+//
+//========================================================================
+
+#ifdef __GNUC__
+#pragma implementation
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <ctype.h>
+#include <math.h>
+#include "goo/GooString.h"
+#include "goo/GooList.h"
+#include "UnicodeMap.h"
+#include "goo/gmem.h"
+#include "config.h"
+#include "Error.h"
+#include "GfxState.h"
+#include "DCTStream.h"
+#include "GlobalParams.h"
+#include "HtmlOutputDev.h"
+#include "HtmlFonts.h"
+
+int HtmlPage::pgNum=0;
+int HtmlOutputDev::imgNum=1;
+
+extern double scale;
+extern GBool complexMode;
+extern GBool ignore;
+extern GBool printCommands;
+extern GBool printHtml;
+extern GBool noframes;
+extern GBool stout;
+extern GBool xml;
+extern GBool showHidden;
+extern GBool noMerge;
+
+static GooString* basename(GooString* str){
+  
+  char *p=str->getCString();
+  int len=str->getLength();
+  for (int i=len-1;i>=0;i--)
+    if (*(p+i)==SLASH) 
+      return new GooString((p+i+1),len-i-1);
+  return new GooString(str);
+}
+
+static GooString* Dirname(GooString* str){
+  
+  char *p=str->getCString();
+  int len=str->getLength();
+  for (int i=len-1;i>=0;i--)
+    if (*(p+i)==SLASH) 
+      return new GooString(p,i+1);
+  return new GooString();
+} 
+
+//------------------------------------------------------------------------
+// HtmlString
+//------------------------------------------------------------------------
+
+HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts) {
+  GfxFont *font;
+  double x, y;
+
+  state->transform(state->getCurX(), state->getCurY(), &x, &y);
+  if ((font = state->getFont())) {
+    yMin = y - font->getAscent() * fontSize;
+    yMax = y - font->getDescent() * fontSize;
+    GfxRGB rgb;
+    state->getFillRGB(&rgb);
+    GooString *name = state->getFont()->getName();
+    if (!name) name = HtmlFont::getDefaultFont(); //new GooString("default");
+    HtmlFont hfont=HtmlFont(name, static_cast<int>(fontSize-1), rgb);
+    fontpos = fonts->AddFont(hfont);
+  } else {
+    // this means that the PDF file draws text without a current font,
+    // which should never happen
+    yMin = y - 0.95 * fontSize;
+    yMax = y + 0.35 * fontSize;
+    fontpos=0;
+  }
+  if (yMin == yMax) {
+    // this is a sanity check for a case that shouldn't happen -- but
+    // if it does happen, we want to avoid dividing by zero later
+    yMin = y;
+    yMax = y + 1;
+  }
+  col = 0;
+  text = NULL;
+  xRight = NULL;
+  link = NULL;
+  len = size = 0;
+  yxNext = NULL;
+  xyNext = NULL;
+  htext=new GooString();
+  dir = textDirUnknown;
+}
+
+
+HtmlString::~HtmlString() {
+  delete text;
+  delete htext;
+  gfree(xRight);
+}
+
+void HtmlString::addChar(GfxState *state, double x, double y,
+			 double dx, double dy, Unicode u) {
+  if (dir == textDirUnknown) {
+    //dir = UnicodeMap::getDirection(u);
+    dir = textDirLeftRight;
+  } 
+
+  if (len == size) {
+    size += 16;
+    text = (Unicode *)grealloc(text, size * sizeof(Unicode));
+    xRight = (double *)grealloc(xRight, size * sizeof(double));
+  }
+  text[len] = u;
+  if (len == 0) {
+    xMin = x;
+  }
+  xMax = xRight[len] = x + dx;
+//printf("added char: %f %f xright = %f\n", x, dx, x+dx);
+  ++len;
+}
+
+void HtmlString::endString()
+{
+  if( dir == textDirRightLeft && len > 1 )
+  {
+    //printf("will reverse!\n");
+    for (int i = 0; i < len / 2; i++)
+    {
+      Unicode ch = text[i];
+      text[i] = text[len - i - 1];
+      text[len - i - 1] = ch;
+    }
+  }
+}
+
+//------------------------------------------------------------------------
+// HtmlPage
+//------------------------------------------------------------------------
+
+HtmlPage::HtmlPage(GBool rawOrder, char *imgExtVal) {
+  this->rawOrder = rawOrder;
+  curStr = NULL;
+  yxStrings = NULL;
+  xyStrings = NULL;
+  yxCur1 = yxCur2 = NULL;
+  fonts=new HtmlFontAccu();
+  links=new HtmlLinks();
+  pageWidth=0;
+  pageHeight=0;
+  fontsPageMarker = 0;
+  DocName=NULL;
+  firstPage = -1;
+  imgExt = new GooString(imgExtVal);
+}
+
+HtmlPage::~HtmlPage() {
+  clear();
+  if (DocName) delete DocName;
+  if (fonts) delete fonts;
+  if (links) delete links;
+  if (imgExt) delete imgExt;  
+}
+
+void HtmlPage::updateFont(GfxState *state) {
+  GfxFont *font;
+  double *fm;
+  char *name;
+  int code;
+  double w;
+  
+  // adjust the font size
+  fontSize = state->getTransformedFontSize();
+  if ((font = state->getFont()) && font->getType() == fontType3) {
+    // This is a hack which makes it possible to deal with some Type 3
+    // fonts.  The problem is that it's impossible to know what the
+    // base coordinate system used in the font is without actually
+    // rendering the font.  This code tries to guess by looking at the
+    // width of the character 'm' (which breaks if the font is a
+    // subset that doesn't contain 'm').
+    for (code = 0; code < 256; ++code) {
+      if ((name = ((Gfx8BitFont *)font)->getCharName(code)) &&
+	  name[0] == 'm' && name[1] == '\0') {
+	break;
+      }
+    }
+    if (code < 256) {
+      w = ((Gfx8BitFont *)font)->getWidth(code);
+      if (w != 0) {
+	// 600 is a generic average 'm' width -- yes, this is a hack
+	fontSize *= w / 0.6;
+      }
+    }
+    fm = font->getFontMatrix();
+    if (fm[0] != 0) {
+      fontSize *= fabs(fm[3] / fm[0]);
+    }
+  }
+}
+
+void HtmlPage::beginString(GfxState *state, GooString *s) {
+  curStr = new HtmlString(state, fontSize, fonts);
+}
+
+
+void HtmlPage::conv(){
+  HtmlString *tmp;
+
+  int linkIndex = 0;
+  HtmlFont* h;
+  for(tmp=yxStrings;tmp;tmp=tmp->yxNext){
+     int pos=tmp->fontpos;
+     //  printf("%d\n",pos);
+     h=fonts->Get(pos);
+
+     if (tmp->htext) delete tmp->htext; 
+     tmp->htext=HtmlFont::simple(h,tmp->text,tmp->len);
+
+     if (links->inLink(tmp->xMin,tmp->yMin,tmp->xMax,tmp->yMax, linkIndex)){
+       tmp->link = links->getLink(linkIndex);
+       /*GooString *t=tmp->htext;
+       tmp->htext=links->getLink(k)->Link(tmp->htext);
+       delete t;*/
+     }
+  }
+
+}
+
+
+void HtmlPage::addChar(GfxState *state, double x, double y,
+		       double dx, double dy, 
+			double ox, double oy, Unicode *u, int uLen) {
+  double x1, y1, w1, h1, dx2, dy2;
+  int n, i;
+  state->transform(x, y, &x1, &y1);
+  n = curStr->len;
+ 
+  // check that new character is in the same direction as current string
+  // and is not too far away from it before adding 
+  //if ((UnicodeMap::getDirection(u[0]) != curStr->dir) || 
+  // XXX
+  if (
+     (n > 0 && 
+      fabs(x1 - curStr->xRight[n-1]) > 0.1 * (curStr->yMax - curStr->yMin))) {
+    endString();
+    beginString(state, NULL);
+  }
+  state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(),
+			    0, &dx2, &dy2);
+  dx -= dx2;
+  dy -= dy2;
+  state->transformDelta(dx, dy, &w1, &h1);
+  if (uLen != 0) {
+    w1 /= uLen;
+    h1 /= uLen;
+  }
+  for (i = 0; i < uLen; ++i) {
+    curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
+  }
+}
+
+void HtmlPage::endString() {
+  HtmlString *p1, *p2;
+  double h, y1, y2;
+
+  // throw away zero-length strings -- they don't have valid xMin/xMax
+  // values, and they're useless anyway
+  if (curStr->len == 0) {
+    delete curStr;
+    curStr = NULL;
+    return;
+  }
+
+  curStr->endString();
+
+#if 0 //~tmp
+  if (curStr->yMax - curStr->yMin > 20) {
+    delete curStr;
+    curStr = NULL;
+    return;
+  }
+#endif
+
+  // insert string in y-major list
+  h = curStr->yMax - curStr->yMin;
+  y1 = curStr->yMin + 0.5 * h;
+  y2 = curStr->yMin + 0.8 * h;
+  if (rawOrder) {
+    p1 = yxCur1;
+    p2 = NULL;
+  } else if ((!yxCur1 ||
+              (y1 >= yxCur1->yMin &&
+               (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) &&
+             (!yxCur2 ||
+              (y1 < yxCur2->yMin ||
+               (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
+    p1 = yxCur1;
+    p2 = yxCur2;
+  } else {
+    for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
+      if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin))
+        break;
+    }
+    yxCur2 = p2;
+  }
+  yxCur1 = curStr;
+  if (p1)
+    p1->yxNext = curStr;
+  else
+    yxStrings = curStr;
+  curStr->yxNext = p2;
+  curStr = NULL;
+}
+
+void HtmlPage::coalesce() {
+  HtmlString *str1, *str2;
+  HtmlFont *hfont1, *hfont2;
+  double space, horSpace, vertSpace, vertOverlap;
+  GBool addSpace, addLineBreak;
+  int n, i;
+  double curX, curY;
+
+#if 0 //~ for debugging
+  for (str1 = yxStrings; str1; str1 = str1->yxNext) {
+    printf("x=%f..%f  y=%f..%f  size=%2d '",
+	   str1->xMin, str1->xMax, str1->yMin, str1->yMax,
+	   (int)(str1->yMax - str1->yMin));
+    for (i = 0; i < str1->len; ++i) {
+      fputc(str1->text[i] & 0xff, stdout);
+    }
+    printf("'\n");
+  }
+  printf("\n------------------------------------------------------------\n\n");
+#endif
+  str1 = yxStrings;
+
+  if( !str1 ) return;
+
+  //----- discard duplicated text (fake boldface, drop shadows)
+  if( !complexMode )
+  {	/* if not in complex mode get rid of duplicate strings */
+	HtmlString *str3;
+	GBool found;
+  	while (str1)
+	{
+		double size = str1->yMax - str1->yMin;
+		double xLimit = str1->xMin + size * 0.2;
+		found = gFalse;
+		for (str2 = str1, str3 = str1->yxNext;
+			str3 && str3->xMin < xLimit;
+			str2 = str3, str3 = str2->yxNext)
+		{
+			if (str3->len == str1->len &&
+				!memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) &&
+				fabs(str3->yMin - str1->yMin) < size * 0.2 &&
+				fabs(str3->yMax - str1->yMax) < size * 0.2 &&
+				fabs(str3->xMax - str1->xMax) < size * 0.2)
+			{
+				found = gTrue;
+				//printf("found duplicate!\n");
+				break;
+			}
+		}
+		if (found)
+		{
+			str2->xyNext = str3->xyNext;
+			str2->yxNext = str3->yxNext;
+			delete str3;
+		}
+		else
+		{
+			str1 = str1->yxNext;
+		}
+	}		
+  }	/*- !complexMode */
+  
+  str1 = yxStrings;
+  
+  hfont1 = getFont(str1);
+  if( hfont1->isBold() )
+    str1->htext->insert(0,"<b>",3);
+  if( hfont1->isItalic() )
+    str1->htext->insert(0,"<i>",3);
+  if( str1->getLink() != NULL ) {
+    GooString *ls = str1->getLink()->getLinkStart();
+    str1->htext->insert(0, ls);
+    delete ls;
+  }
+  curX = str1->xMin; curY = str1->yMin;
+
+  while (str1 && (str2 = str1->yxNext)) {
+    hfont2 = getFont(str2);
+    space = str1->yMax - str1->yMin;
+    horSpace = str2->xMin - str1->xMax;
+    addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4);
+    vertSpace = str2->yMin - str1->yMax;
+
+//printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);
+
+    if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax)
+    {
+	vertOverlap = str1->yMax - str2->yMin;
+    } else
+    if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax)
+    {
+	vertOverlap = str2->yMax - str1->yMin;
+    } else
+    {
+    	vertOverlap = 0;
+    } 
+    
+    if (
+	(
+	 (
+	  (
+	   (rawOrder && vertOverlap > 0.5 * space) 
+	   ||
+	   (!rawOrder && str2->yMin < str1->yMax)
+	  ) &&
+	  (horSpace > -0.5 * space && horSpace < space)
+	 ) ||
+       	 (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak)
+	) &&
+	(!complexMode || (hfont1->isEqualIgnoreBold(*hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter
+	str1->dir == str2->dir // text direction the same
+       ) 
+    {
+//      printf("yes\n");
+      n = str1->len + str2->len;
+      if ((addSpace = horSpace > 0.1 * space)) {
+        ++n;
+      }
+      if (addLineBreak) {
+        ++n;
+      }
+  
+      str1->size = (n + 15) & ~15;
+      str1->text = (Unicode *)grealloc(str1->text,
+				       str1->size * sizeof(Unicode));
+      str1->xRight = (double *)grealloc(str1->xRight,
+					str1->size * sizeof(double));
+      if (addSpace) {
+		  str1->text[str1->len] = 0x20;
+		  str1->htext->append(" ");
+		  str1->xRight[str1->len] = str2->xMin;
+		  ++str1->len;
+      }
+      if (addLineBreak) {
+	  str1->text[str1->len] = '\n';
+	  str1->htext->append("<br>");
+	  str1->xRight[str1->len] = str2->xMin;
+	  ++str1->len;
+	  str1->yMin = str2->yMin;
+	  str1->yMax = str2->yMax;
+	  str1->xMax = str2->xMax;
+	  int fontLineSize = hfont1->getLineSize();
+	  int curLineSize = (int)(vertSpace + space); 
+	  if( curLineSize != fontLineSize )
+	  {
+	      HtmlFont *newfnt = new HtmlFont(*hfont1);
+	      newfnt->setLineSize(curLineSize);
+	      str1->fontpos = fonts->AddFont(*newfnt);
+	      delete newfnt;
+	      hfont1 = getFont(str1);
+	      // we have to reget hfont2 because it's location could have
+	      // changed on resize
+	      hfont2 = getFont(str2); 
+	  }
+      }
+      for (i = 0; i < str2->len; ++i) {
+	str1->text[str1->len] = str2->text[i];
+	str1->xRight[str1->len] = str2->xRight[i];
+	++str1->len;
+      }
+
+      /* fix <i> and <b> if str1 and str2 differ */
+      if( hfont1->isBold() && !hfont2->isBold() )
+	str1->htext->append("</b>", 4);
+      if( hfont1->isItalic() && !hfont2->isItalic() )
+	str1->htext->append("</i>", 4);
+      if( !hfont1->isBold() && hfont2->isBold() )
+	str1->htext->append("<b>", 3);
+      if( !hfont1->isItalic() && hfont2->isItalic() )
+	str1->htext->append("<i>", 3);
+
+      /* now handle switch of links */
+      HtmlLink *hlink1 = str1->getLink();
+      HtmlLink *hlink2 = str2->getLink();
+      if( !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2) ) {
+	if(hlink1 != NULL )
+	  str1->htext->append("</a>");
+	if(hlink2 != NULL ) {
+	  GooString *ls = hlink2->getLinkStart();
+	  str1->htext->append(ls);
+	  delete ls;
+	}
+      }
+
+      str1->htext->append(str2->htext);
+      // str1 now contains href for link of str2 (if it is defined)
+      str1->link = str2->link; 
+      hfont1 = hfont2;
+      if (str2->xMax > str1->xMax) {
+	str1->xMax = str2->xMax;
+      }
+      if (str2->yMax > str1->yMax) {
+	str1->yMax = str2->yMax;
+      }
+      str1->yxNext = str2->yxNext;
+      delete str2;
+    } else { // keep strings separate
+//      printf("no\n"); 
+      if( hfont1->isBold() )
+	str1->htext->append("</b>",4);
+      if( hfont1->isItalic() )
+	str1->htext->append("</i>",4);
+      if(str1->getLink() != NULL )
+	str1->htext->append("</a>");
+     
+      str1->xMin = curX; str1->yMin = curY; 
+      str1 = str2;
+      curX = str1->xMin; curY = str1->yMin;
+      hfont1 = hfont2;
+      if( hfont1->isBold() )
+	str1->htext->insert(0,"<b>",3);
+      if( hfont1->isItalic() )
+	str1->htext->insert(0,"<i>",3);
+      if( str1->getLink() != NULL ) {
+	GooString *ls = str1->getLink()->getLinkStart();
+	str1->htext->insert(0, ls);
+	delete ls;
+      }
+    }
+  }
+  str1->xMin = curX; str1->yMin = curY;
+  if( hfont1->isBold() )
+    str1->htext->append("</b>",4);
+  if( hfont1->isItalic() )
+    str1->htext->append("</i>",4);
+  if(str1->getLink() != NULL )
+    str1->htext->append("</a>");
+
+#if 0 //~ for debugging
+  for (str1 = yxStrings; str1; str1 = str1->yxNext) {
+    printf("x=%3d..%3d  y=%3d..%3d  size=%2d ",
+	   (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
+	   (int)(str1->yMax - str1->yMin));
+    printf("'%s'\n", str1->htext->getCString());  
+  }
+  printf("\n------------------------------------------------------------\n\n");
+#endif
+
+}
+
+void HtmlPage::dumpAsXML(FILE* f,int page){  
+  fprintf(f, "<page number=\"%d\" position=\"absolute\"", page);
+  fprintf(f," top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight,pageWidth);
+    
+  for(int i=fontsPageMarker;i < fonts->size();i++) {
+    GooString *fontCSStyle = fonts->CSStyle(i);
+    fprintf(f,"\t%s\n",fontCSStyle->getCString());
+    delete fontCSStyle;
+  }
+  
+  GooString *str, *str1;
+  for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
+    if (tmp->htext){
+      str=new GooString(tmp->htext);
+      fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin));
+      fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin));
+      fprintf(f,"font=\"%d\">", tmp->fontpos);
+      if (tmp->fontpos!=-1){
+	str1=fonts->getCSStyle(tmp->fontpos, str);
+      }
+      fputs(str1->getCString(),f);
+      delete str;
+      delete str1;
+      fputs("</text>\n",f);
+    }
+  }
+  fputs("</page>\n",f);
+}
+
+
+void HtmlPage::dumpComplex(FILE *file, int page){
+  FILE* pageFile;
+  GooString* tmp;
+  char* htmlEncoding;
+
+  if( firstPage == -1 ) firstPage = page; 
+  
+  if( !noframes )
+  {
+      GooString* pgNum=GooString::fromInt(page);
+      tmp = new GooString(DocName);
+      tmp->append('-')->append(pgNum)->append(".html");
+      delete pgNum;
+  
+      if (!(pageFile = fopen(getFileNameFromPath(tmp->getCString(),tmp->getLength()), "w"))) {
+	  error(-1, "Couldn't open html file '%s'", tmp->getCString());
+	  delete tmp;
+	  return;
+      } 
+      delete tmp;
+
+      fprintf(pageFile,"%s\n<HTML>\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n",
+	      DOCTYPE, page);
+
+      htmlEncoding = HtmlOutputDev::mapEncodingToHtml
+	  (globalParams->getTextEncodingName());
+      fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+  }
+  else 
+  {
+      pageFile = file;
+      fprintf(pageFile,"<!-- Page %d -->\n", page);
+      fprintf(pageFile,"<a name=\"%d\"></a>\n", page);
+  } 
+  
+  fprintf(pageFile,"<DIV style=\"position:relative;width:%d;height:%d;\">\n",
+	pageWidth, pageHeight);
+
+  tmp=basename(DocName);
+   
+  fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile);
+  for(int i=fontsPageMarker;i!=fonts->size();i++) {
+    GooString *fontCSStyle = fonts->CSStyle(i);
+    fprintf(pageFile,"\t%s\n",fontCSStyle->getCString());
+    delete fontCSStyle;
+  }
+ 
+  fputs("-->\n</STYLE>\n",pageFile);
+  
+  if( !noframes )
+  {  
+      fputs("</HEAD>\n<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile); 
+  }
+  
+  if( !ignore ) 
+  {
+    fprintf(pageFile,
+	    "<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\">\n",
+	    pageWidth, pageHeight, tmp->getCString(), 
+		(page-firstPage+1), imgExt->getCString());
+  }
+  
+  delete tmp;
+  
+  GooString *str, *str1;
+  for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){
+    if (tmp1->htext){
+      str=new GooString(tmp1->htext);
+      fprintf(pageFile,
+	      "<DIV style=\"position:absolute;top:%d;left:%d\">",
+	      xoutRound(tmp1->yMin),
+	      xoutRound(tmp1->xMin));
+      fputs("<nobr>",pageFile); 
+      if (tmp1->fontpos!=-1){
+	str1=fonts->getCSStyle(tmp1->fontpos, str);  
+      }
+      //printf("%s\n", str1->getCString());
+      fputs(str1->getCString(),pageFile);
+      
+      delete str;      
+      delete str1;
+      fputs("</nobr></DIV>\n",pageFile);
+    }
+  }
+
+  fputs("</DIV>\n", pageFile);
+  
+  if( !noframes )
+  {
+      fputs("</BODY>\n</HTML>\n",pageFile);
+      fclose(pageFile);
+  }
+}
+
+
+void HtmlPage::dump(FILE *f, int pageNum) 
+{
+  if (complexMode)
+  {
+    if (xml) dumpAsXML(f, pageNum);
+    if (!xml) dumpComplex(f, pageNum);  
+  }
+  else
+  {
+    fprintf(f,"<A name=%d></a>",pageNum);
+    GooString* fName=basename(DocName); 
+    for (int i=1;i<HtmlOutputDev::imgNum;i++)
+      fprintf(f,"<IMG src=\"%s-%d_%d.jpg\"><br>\n",fName->getCString(),pageNum,i);
+    HtmlOutputDev::imgNum=1;
+    delete fName;
+
+    GooString* str;
+    for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
+      if (tmp->htext){
+		str=new GooString(tmp->htext); 
+		fputs(str->getCString(),f);
+		delete str;      
+		fputs("<br>\n",f);  
+      }
+    }
+	fputs("<hr>\n",f);  
+  }
+}
+
+
+
+void HtmlPage::clear() {
+  HtmlString *p1, *p2;
+
+  if (curStr) {
+    delete curStr;
+    curStr = NULL;
+  }
+  for (p1 = yxStrings; p1; p1 = p2) {
+    p2 = p1->yxNext;
+    delete p1;
+  }
+  yxStrings = NULL;
+  xyStrings = NULL;
+  yxCur1 = yxCur2 = NULL;
+
+  if( !noframes )
+  {
+      delete fonts;
+      fonts=new HtmlFontAccu();
+      fontsPageMarker = 0;
+  }
+  else
+  {
+      fontsPageMarker = fonts->size();
+  }
+
+  delete links;
+  links=new HtmlLinks();
+ 
+
+}
+
+void HtmlPage::setDocName(char *fname){
+  DocName=new GooString(fname);
+}
+
+//------------------------------------------------------------------------
+// HtmlMetaVar
+//------------------------------------------------------------------------
+
+HtmlMetaVar::HtmlMetaVar(char *_name, char *_content)
+{
+    name = new GooString(_name);
+    content = new GooString(_content);
+}
+
+HtmlMetaVar::~HtmlMetaVar()
+{
+   delete name;
+   delete content;
+} 
+    
+GooString* HtmlMetaVar::toString()	
+{
+    GooString *result = new GooString("<META name=\"");
+    result->append(name);
+    result->append("\" content=\"");
+    result->append(content);
+    result->append("\">"); 
+    return result;
+}
+
+//------------------------------------------------------------------------
+// HtmlOutputDev
+//------------------------------------------------------------------------
+
+static char* HtmlEncodings[][2] = {
+    {"Latin1", "ISO-8859-1"},
+    {NULL, NULL}
+};
+
+
+char* HtmlOutputDev::mapEncodingToHtml(GooString* encoding)
+{
+    char* enc = encoding->getCString();
+    for(int i = 0; HtmlEncodings[i][0] != NULL; i++)
+    {
+	if( strcmp(enc, HtmlEncodings[i][0]) == 0 )
+	{
+	    return HtmlEncodings[i][1];
+	}
+    }
+    return enc; 
+}
+
+void HtmlOutputDev::doFrame(int firstPage){
+  GooString* fName=new GooString(Docname);
+  char* htmlEncoding;
+  fName->append(".html");
+
+  if (!(fContentsFrame = fopen(getFileNameFromPath(fName->getCString(),fName->getLength()), "w"))){
+    delete fName;
+    error(-1, "Couldn't open html file '%s'", fName->getCString());
+    return;
+  }
+  
+  delete fName;
+    
+  fName=basename(Docname);
+  fputs(DOCTYPE_FRAMES, fContentsFrame);
+  fputs("\n<HTML>",fContentsFrame);
+  fputs("\n<HEAD>",fContentsFrame);
+  fprintf(fContentsFrame,"\n<TITLE>%s</TITLE>",docTitle->getCString());
+  htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
+  fprintf(fContentsFrame, "\n<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+  dumpMetaVars(fContentsFrame);
+  fprintf(fContentsFrame, "</HEAD>\n");
+  fputs("<FRAMESET cols=\"100,*\">\n",fContentsFrame);
+  fprintf(fContentsFrame,"<FRAME name=\"links\" src=\"%s_ind.html\">\n",fName->getCString());
+  fputs("<FRAME name=\"contents\" src=",fContentsFrame); 
+  if (complexMode) 
+      fprintf(fContentsFrame,"\"%s-%d.html\"",fName->getCString(), firstPage);
+  else
+      fprintf(fContentsFrame,"\"%ss.html\"",fName->getCString());
+  
+  fputs(">\n</FRAMESET>\n</HTML>\n",fContentsFrame);
+ 
+  delete fName;
+  fclose(fContentsFrame);  
+}
+
+HtmlOutputDev::HtmlOutputDev(char *fileName, char *title, 
+	char *author, char *keywords, char *subject, char *date,
+	char *extension,
+	GBool rawOrder, int firstPage, GBool outline) 
+{
+  char *htmlEncoding;
+  
+  fContentsFrame = NULL;
+  docTitle = new GooString(title);
+  pages = NULL;
+  dumpJPEG=gTrue;
+  //write = gTrue;
+  this->rawOrder = rawOrder;
+  this->doOutline = outline;
+  ok = gFalse;
+  imgNum=1;
+  //this->firstPage = firstPage;
+  //pageNum=firstPage;
+  // open file
+  needClose = gFalse;
+  pages = new HtmlPage(rawOrder, extension);
+  
+  glMetaVars = new GooList();
+  glMetaVars->append(new HtmlMetaVar("generator", "pdftohtml 0.36"));  
+  if( author ) glMetaVars->append(new HtmlMetaVar("author", author));  
+  if( keywords ) glMetaVars->append(new HtmlMetaVar("keywords", keywords));  
+  if( date ) glMetaVars->append(new HtmlMetaVar("date", date));  
+  if( subject ) glMetaVars->append(new HtmlMetaVar("subject", subject));
+ 
+  maxPageWidth = 0;
+  maxPageHeight = 0;
+
+  pages->setDocName(fileName);
+  Docname=new GooString (fileName);
+
+  // for non-xml output (complex or simple) with frames generate the left frame
+  if(!xml && !noframes)
+  {
+     GooString* left=new GooString(fileName);
+     left->append("_ind.html");
+
+     doFrame(firstPage);
+   
+     if (!(fContentsFrame = fopen(getFileNameFromPath(left->getCString(),left->getLength()), "w")))
+	 {
+        error(-1, "Couldn't open html file '%s'", left->getCString());
+		delete left;
+        return;
+     }
+     delete left;
+     fputs(DOCTYPE, fContentsFrame);
+     fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",fContentsFrame);
+     
+  	if (doOutline)
+	{
+		GooString *str = basename(Docname);
+		fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
+		delete str;
+	}
+  	
+	if (!complexMode)
+	{	/* not in complex mode */
+		
+       GooString* right=new GooString(fileName);
+       right->append("s.html");
+
+       if (!(page=fopen(getFileNameFromPath(right->getCString(),right->getLength()),"w"))){
+        error(-1, "Couldn't open html file '%s'", right->getCString());
+        delete right;
+		return;
+       }
+       delete right;
+       fputs(DOCTYPE, page);
+       fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n",page);
+     }
+  }
+
+  if (noframes) {
+    if (stout) page=stdout;
+    else {
+      GooString* right=new GooString(fileName);
+      if (!xml) right->append(".html");
+      if (xml) right->append(".xml");
+      if (!(page=fopen(getFileNameFromPath(right->getCString(),right->getLength()),"w"))){
+	delete right;
+	error(-1, "Couldn't open html file '%s'", right->getCString());
+	return;
+      }  
+      delete right;
+    }
+
+    htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName()); 
+    if (xml) 
+    {
+      fprintf(page, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", htmlEncoding);
+      fputs("<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n", page);
+      fputs("<pdf2xml>\n",page);
+    } 
+    else 
+    {
+      fprintf(page,"%s\n<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n",
+	      DOCTYPE, docTitle->getCString());
+      
+      fprintf(page, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\">\n", htmlEncoding);
+      
+      dumpMetaVars(page);
+      fprintf(page,"</HEAD>\n");
+      fprintf(page,"<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n");
+    }
+  }
+  ok = gTrue; 
+}
+
+HtmlOutputDev::~HtmlOutputDev() {
+  /*if (mode&&!xml){
+    int h=xoutRound(pages->pageHeight/scale);
+    int w=xoutRound(pages->pageWidth/scale);
+    fprintf(tin,"%s=%03d\n","PAPER_WIDTH",w);
+    fprintf(tin,"%s=%03d\n","PAPER_HEIGHT",h);
+    fclose(tin);
+    }*/
+
+    HtmlFont::clear(); 
+    
+    delete Docname;
+    delete docTitle;
+
+    deleteGooList(glMetaVars, HtmlMetaVar);
+
+    if (fContentsFrame){
+      fputs("</BODY>\n</HTML>\n",fContentsFrame);  
+      fclose(fContentsFrame);
+    }
+    if (xml) {
+      fputs("</pdf2xml>\n",page);  
+      fclose(page);
+    } else
+    if ( !complexMode || xml || noframes )
+    { 
+      fputs("</BODY>\n</HTML>\n",page);  
+      fclose(page);
+    }
+    if (pages)
+      delete pages;
+}
+
+
+
+void HtmlOutputDev::startPage(int pageNum, GfxState *state) {
+  /*if (mode&&!xml){
+    if (write){
+      write=gFalse;
+      GooString* fname=Dirname(Docname);
+      fname->append("image.log");
+      if((tin=fopen(getFileNameFromPath(fname->getCString(),fname->getLength()),"w"))==NULL){
+	printf("Error : can not open %s",fname);
+	exit(1);
+      }
+      delete fname;
+    // if(state->getRotation()!=0) 
+    //  fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1());
+    // else 
+      fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1());  
+    }
+  }*/
+
+  this->pageNum = pageNum;
+  GooString *str=basename(Docname);
+  pages->clear(); 
+  if(!noframes)
+  {
+    if (fContentsFrame)
+	{
+      if (complexMode)
+		fprintf(fContentsFrame,"<A href=\"%s-%d.html\"",str->getCString(),pageNum);
+      else 
+		fprintf(fContentsFrame,"<A href=\"%ss.html#%d\"",str->getCString(),pageNum);
+      fprintf(fContentsFrame," target=\"contents\" >Page %d</a><br>\n",pageNum);
+    }
+  }
+
+  pages->pageWidth=static_cast<int>(state->getPageWidth());
+  pages->pageHeight=static_cast<int>(state->getPageHeight());
+
+  delete str;
+} 
+
+
+void HtmlOutputDev::endPage() {
+  pages->conv();
+  pages->coalesce();
+  pages->dump(page, pageNum);
+  
+  // I don't yet know what to do in the case when there are pages of different
+  // sizes and we want complex output: running ghostscript many times 
+  // seems very inefficient. So for now I'll just use last page's size
+  maxPageWidth = pages->pageWidth;
+  maxPageHeight = pages->pageHeight;
+  
+  //if(!noframes&&!xml) fputs("<br>\n", fContentsFrame);
+  if(!stout && !globalParams->getErrQuiet()) printf("Page-%d\n",(pageNum));
+}
+
+void HtmlOutputDev::updateFont(GfxState *state) {
+  pages->updateFont(state);
+}
+
+void HtmlOutputDev::beginString(GfxState *state, GooString *s) {
+  pages->beginString(state, s);
+}
+
+void HtmlOutputDev::endString(GfxState *state) {
+  pages->endString();
+}
+
+void HtmlOutputDev::drawChar(GfxState *state, double x, double y,
+	      double dx, double dy,
+	      double originX, double originY,
+	      CharCode code, Unicode *u, int uLen) 
+{
+  if ( !showHidden && (state->getRender() & 3) == 3) {
+    return;
+  }
+  pages->addChar(state, x, y, dx, dy, originX, originY, u, uLen);
+}
+
+void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
+			      int width, int height, GBool invert,
+			      GBool inlineImg) {
+
+  int i, j;
+  
+  if (ignore||complexMode) {
+    OutputDev::drawImageMask(state, ref, str, width, height, invert, inlineImg);
+    return;
+  }
+  
+  FILE *f1;
+  int c;
+  
+  int x0, y0;			// top left corner of image
+  int w0, h0, w1, h1;		// size of image
+  double xt, yt, wt, ht;
+  GBool rotate, xFlip, yFlip;
+  GBool dither;
+  int x, y;
+  int ix, iy;
+  int px1, px2, qx, dx;
+  int py1, py2, qy, dy;
+  Gulong pixel;
+  int nComps, nVals, nBits;
+  double r1, g1, b1;
+ 
+  // get image position and size
+  state->transform(0, 0, &xt, &yt);
+  state->transformDelta(1, 1, &wt, &ht);
+  if (wt > 0) {
+    x0 = xoutRound(xt);
+    w0 = xoutRound(wt);
+  } else {
+    x0 = xoutRound(xt + wt);
+    w0 = xoutRound(-wt);
+  }
+  if (ht > 0) {
+    y0 = xoutRound(yt);
+    h0 = xoutRound(ht);
+  } else {
+    y0 = xoutRound(yt + ht);
+    h0 = xoutRound(-ht);
+  }
+  state->transformDelta(1, 0, &xt, &yt);
+  rotate = fabs(xt) < fabs(yt);
+  if (rotate) {
+    w1 = h0;
+    h1 = w0;
+    xFlip = ht < 0;
+    yFlip = wt > 0;
+  } else {
+    w1 = w0;
+    h1 = h0;
+    xFlip = wt < 0;
+    yFlip = ht > 0;
+  }
+
+  // dump JPEG file
+  if (dumpJPEG  && str->getKind() == strDCT) {
+    GooString *fName=new GooString(Docname);
+    fName->append("-");
+    GooString *pgNum=GooString::fromInt(pageNum);
+    GooString *imgnum=GooString::fromInt(imgNum);
+    // open the image file
+    fName->append(pgNum)->append("_")->append(imgnum)->append(".jpg");
+    ++imgNum;
+    if (!(f1 = fopen(getFileNameFromPath(fName->getCString(),fName->getLength()), "wb"))) {
+      error(-1, "Couldn't open image file '%s'", fName->getCString());
+      return;
+    }
+
+    // initialize stream
+    str = ((DCTStream *)str)->getRawStream();
+    str->reset();
+
+    // copy the stream
+    while ((c = str->getChar()) != EOF)
+      fputc(c, f1);
+
+    fclose(f1);
+   
+  if (pgNum) delete pgNum;
+  if (imgnum) delete imgnum;
+  if (fName) delete fName;
+  }
+  else {
+    OutputDev::drawImageMask(state, ref, str, width, height, invert, inlineImg);
+  }
+}
+
+void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
+			  int width, int height, GfxImageColorMap *colorMap,
+			  int *maskColors, GBool inlineImg) {
+
+  int i, j;
+   
+  if (ignore||complexMode) {
+    OutputDev::drawImage(state, ref, str, width, height, colorMap, 
+			 maskColors, inlineImg);
+    return;
+  }
+
+  FILE *f1;
+  ImageStream *imgStr;
+  Guchar pixBuf[4];
+  GfxColor color;
+  int c;
+  
+  int x0, y0;			// top left corner of image
+  int w0, h0, w1, h1;		// size of image
+  double xt, yt, wt, ht;
+  GBool rotate, xFlip, yFlip;
+  GBool dither;
+  int x, y;
+  int ix, iy;
+  int px1, px2, qx, dx;
+  int py1, py2, qy, dy;
+  Gulong pixel;
+  int nComps, nVals, nBits;
+  double r1, g1, b1;
+ 
+  // get image position and size
+  state->transform(0, 0, &xt, &yt);
+  state->transformDelta(1, 1, &wt, &ht);
+  if (wt > 0) {
+    x0 = xoutRound(xt);
+    w0 = xoutRound(wt);
+  } else {
+    x0 = xoutRound(xt + wt);
+    w0 = xoutRound(-wt);
+  }
+  if (ht > 0) {
+    y0 = xoutRound(yt);
+    h0 = xoutRound(ht);
+  } else {
+    y0 = xoutRound(yt + ht);
+    h0 = xoutRound(-ht);
+  }
+  state->transformDelta(1, 0, &xt, &yt);
+  rotate = fabs(xt) < fabs(yt);
+  if (rotate) {
+    w1 = h0;
+    h1 = w0;
+    xFlip = ht < 0;
+    yFlip = wt > 0;
+  } else {
+    w1 = w0;
+    h1 = h0;
+    xFlip = wt < 0;
+    yFlip = ht > 0;
+  }
+
+   
+  /*if( !globalParams->getErrQuiet() )
+    printf("image stream of kind %d\n", str->getKind());*/
+  // dump JPEG file
+  if (dumpJPEG && str->getKind() == strDCT) {
+    GooString *fName=new GooString(Docname);
+    fName->append("-");
+    GooString *pgNum= GooString::fromInt(pageNum);
+    GooString *imgnum= GooString::fromInt(imgNum);  
+    
+    // open the image file
+    fName->append(pgNum)->append("_")->append(imgnum)->append(".jpg");
+    ++imgNum;
+    
+    if (!(f1 = fopen(getFileNameFromPath(fName->getCString(),fName->getLength()), "wb"))) {
+      error(-1, "Couldn't open image file '%s'", fName->getCString());
+      return;
+    }
+
+    // initialize stream
+    str = ((DCTStream *)str)->getRawStream();
+    str->reset();
+
+    // copy the stream
+    while ((c = str->getChar()) != EOF)
+      fputc(c, f1);
+    
+    fclose(f1);
+  
+    delete fName;
+    delete pgNum;
+    delete imgnum;
+  }
+  else {
+    OutputDev::drawImage(state, ref, str, width, height, colorMap,
+			 maskColors, inlineImg);
+  }
+}
+
+
+
+void HtmlOutputDev::drawLink(Link* link,Catalog *cat){
+  double _x1,_y1,_x2,_y2,w;
+  int x1,y1,x2,y2;
+  
+  link->getRect(&_x1,&_y1,&_x2,&_y2);
+  w = link->getBorderStyle()->getWidth();
+  cvtUserToDev(_x1,_y1,&x1,&y1);
+  
+  cvtUserToDev(_x2,_y2,&x2,&y2); 
+
+
+  GooString* _dest=getLinkDest(link,cat);
+  HtmlLink t((double) x1,(double) y2,(double) x2,(double) y1,_dest);
+  pages->AddLink(t);
+  delete _dest;
+}
+
+GooString* HtmlOutputDev::getLinkDest(Link *link,Catalog* catalog){
+  char *p;
+  switch(link->getAction()->getKind()) 
+  {
+      case actionGoTo:
+	  { 
+	  GooString* file=basename(Docname);
+	  int page=1;
+	  LinkGoTo *ha=(LinkGoTo *)link->getAction();
+	  LinkDest *dest=NULL;
+	  if (ha->getDest()==NULL) 
+	      dest=catalog->findDest(ha->getNamedDest());
+	  else 
+	      dest=ha->getDest()->copy();
+	  if (dest){ 
+	      if (dest->isPageRef()){
+		  Ref pageref=dest->getPageRef();
+		  page=catalog->findPage(pageref.num,pageref.gen);
+	      }
+	      else {
+		  page=dest->getPageNum();
+	      }
+
+	      delete dest;
+
+	      GooString *str=GooString::fromInt(page);
+	      /* 		complex 	simple
+	       	frames		file-4.html	files.html#4
+		noframes	file.html#4	file.html#4
+	       */
+	      if (noframes)
+	      {
+		  file->append(".html#");
+		  file->append(str);
+	      }
+	      else
+	      {
+	      	if( complexMode ) 
+		{
+		    file->append("-");
+		    file->append(str);
+		    file->append(".html");
+		}
+		else
+		{
+		    file->append("s.html#");
+		    file->append(str);
+		}
+	      }
+
+	      if (printCommands) printf(" link to page %d ",page);
+	      delete str;
+	      return file;
+	  }
+	  else 
+	  {
+	      return new GooString();
+	  }
+	  }
+      case actionGoToR:
+	  {
+	  LinkGoToR *ha=(LinkGoToR *) link->getAction();
+	  LinkDest *dest=NULL;
+	  int page=1;
+	  GooString *file=new GooString();
+	  if (ha->getFileName()){
+	      delete file;
+	      file=new GooString(ha->getFileName()->getCString());
+	  }
+	  if (ha->getDest()!=NULL)  dest=ha->getDest()->copy();
+	  if (dest&&file){
+	      if (!(dest->isPageRef()))  page=dest->getPageNum();
+	      delete dest;
+
+	      if (printCommands) printf(" link to page %d ",page);
+	      if (printHtml){
+		  p=file->getCString()+file->getLength()-4;
+		  if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){
+		      file->del(file->getLength()-4,4);
+		      file->append(".html");
+		  }
+		  file->append('#');
+		  file->append(GooString::fromInt(page));
+	      }
+	  }
+	  if (printCommands) printf("filename %s\n",file->getCString());
+	  return file;
+	  }
+      case actionURI:
+	  { 
+	  LinkURI *ha=(LinkURI *) link->getAction();
+	  GooString* file=new GooString(ha->getURI()->getCString());
+	  // printf("uri : %s\n",file->getCString());
+	  return file;
+	  }
+      case actionLaunch:
+	  {
+	  LinkLaunch *ha=(LinkLaunch *) link->getAction();
+	  GooString* file=new GooString(ha->getFileName()->getCString());
+	  if (printHtml) { 
+	      p=file->getCString()+file->getLength()-4;
+	      if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){
+		  file->del(file->getLength()-4,4);
+		  file->append(".html");
+	      }
+	      if (printCommands) printf("filename %s",file->getCString());
+    
+	      return file;      
+  
+	  }
+	  }
+      default:
+	  return new GooString();
+  }
+}
+
+void HtmlOutputDev::dumpMetaVars(FILE *file)
+{
+  GooString *var;
+
+  for(int i = 0; i < glMetaVars->getLength(); i++)
+  {
+     HtmlMetaVar *t = (HtmlMetaVar*)glMetaVars->get(i); 
+     var = t->toString(); 
+     fprintf(file, "%s\n", var->getCString());
+     delete var;
+  }
+}
+
+GBool HtmlOutputDev::dumpDocOutline(Catalog* catalog)
+{ 
+	FILE * output;
+	GBool bClose = gFalse;
+
+	if (!ok || xml)
+    	return gFalse;
+  
+	Object *outlines = catalog->getOutline();
+  	if (!outlines->isDict())
+    	return gFalse;
+  
+	if (!complexMode && !xml)
+  	{
+		output = page;
+  	}
+  	else if (complexMode && !xml)
+	{
+		if (noframes)
+		{
+			output = page; 
+			fputs("<hr>\n", output);
+		}
+		else
+		{
+			GooString *str = basename(Docname);
+			str->append("-outline.html");
+			output = fopen(getFileNameFromPath(str->getCString(),str->getLength()), "w");
+			if (output == NULL)
+				return gFalse;
+			delete str;
+			bClose = gTrue;
+     		fputs("<HTML>\n<HEAD>\n<TITLE>Document Outline</TITLE>\n</HEAD>\n<BODY>\n", output);
+		}
+	}
+ 
+  	GBool done = newOutlineLevel(output, outlines, catalog);
+  	if (done && !complexMode)
+    	fputs("<hr>\n", output);
+	
+	if (bClose)
+	{
+		fputs("</BODY>\n</HTML>\n", output);
+		fclose(output);
+	}
+  	return done;
+}
+
+GBool HtmlOutputDev::newOutlineLevel(FILE *output, Object *node, Catalog* catalog, int level)
+{
+  Object curr, next;
+  GBool atLeastOne = gFalse;
+  
+  if (node->dictLookup("First", &curr)->isDict()) {
+    if (level == 1)
+	{
+		fputs("<A name=\"outline\"></a>", output);
+		fputs("<h1>Document Outline</h1>\n", output);
+	}
+    fputs("<ul>",output);
+    do {
+      // get title, give up if not found
+      Object title;
+      if (curr.dictLookup("Title", &title)->isNull()) {
+		title.free();
+		break;
+      }
+      GooString *titleStr = new GooString(title.getString());
+      title.free();
+
+      // get corresponding link
+      // Note: some code duplicated from HtmlOutputDev::getLinkDest().
+      GooString *linkName = NULL;;
+      Object dest;
+      if (!curr.dictLookup("Dest", &dest)->isNull()) {
+		LinkGoTo *link = new LinkGoTo(&dest);
+		LinkDest *linkdest=NULL;
+		if (link->getDest()==NULL) 
+	  		linkdest=catalog->findDest(link->getNamedDest());
+		else 
+	  		linkdest=link->getDest()->copy();
+		delete link;
+		if (linkdest) { 
+	  		int page;
+	  		if (linkdest->isPageRef()) {
+	    		Ref pageref=linkdest->getPageRef();
+	    		page=catalog->findPage(pageref.num,pageref.gen);
+	  		} else {
+	    		page=linkdest->getPageNum();
+	  		}
+	  		delete linkdest;
+
+			/* 			complex 	simple
+			frames		file-4.html	files.html#4
+			noframes	file.html#4	file.html#4
+	   		*/
+	  		linkName=basename(Docname);
+	  		GooString *str=GooString::fromInt(page);
+	  		if (noframes) {
+	    		linkName->append(".html#");
+				linkName->append(str);
+	  		} else {
+    			if( complexMode ) {
+	   		   		linkName->append("-");
+	      			linkName->append(str);
+	      			linkName->append(".html");
+	    		} else {
+	      			linkName->append("s.html#");
+	      			linkName->append(str);
+	    		}
+	  		}
+			delete str;
+		}
+      }
+      dest.free();
+
+      fputs("<li>",output);
+      if (linkName)
+		fprintf(output,"<A href=\"%s\">", linkName->getCString());
+      fputs(titleStr->getCString(),output);
+      if (linkName) {
+		fputs("</A>",output);
+		delete linkName;
+      }
+      fputs("\n",output);
+      delete titleStr;
+      atLeastOne = gTrue;
+
+      newOutlineLevel(output, &curr, catalog, level+1);
+      curr.dictLookup("Next", &next);
+      curr.free();
+      curr = next;
+    } while(curr.isDict());
+    fputs("</ul>",output);
+  }
+  curr.free();
+
+  return atLeastOne;
+}
+
+char* getFileNameFromPath(char* c, int strlen) {
+  int last_slash_index = 0;
+  int i = 0;
+  char* res;
+  
+  for (i=0;i<strlen;i++) {
+    if (*(c+i)=='/') {
+      /* printf("/ detected\n"); */
+      last_slash_index = i;      
+    }
+  }
+  res = (char *)malloc(sizeof(char)*strlen-last_slash_index+1);
+  strcpy(res,c+last_slash_index+(last_slash_index?1:0));
+  /* printf("Fil: %s\n",res); */
+  return res;
+}
diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h
new file mode 100644
index 00000000..5196ee23
--- /dev/null
+++ b/utils/HtmlOutputDev.h
@@ -0,0 +1,302 @@
+//========================================================================
+//
+// HtmlOutputDev.h
+//
+// Copyright 1997 Derek B. Noonburg
+//
+// Changed 1999 by G.Ovtcharov
+//========================================================================
+
+#ifndef HTMLOUTPUTDEV_H
+#define HTMLOUTPUTDEV_H
+
+#ifdef __GNUC__
+#pragma interface
+#endif
+
+#include <stdio.h>
+#include "goo/gtypes.h"
+#include "goo/GooList.h"
+#include "GfxFont.h"
+#include "OutputDev.h"
+#include "HtmlLinks.h"
+#include "HtmlFonts.h"
+#include "Link.h"
+#include "Catalog.h"
+#include "UnicodeMap.h"
+
+
+#ifdef WIN32
+#  define SLASH '\\'
+#else
+#  define SLASH '/'
+#endif
+
+#define xoutRound(x) ((int)(x + 0.5))
+
+#define DOCTYPE "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">"
+#define DOCTYPE_FRAMES "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\"\n\"http://www.w3.org/TR/html4/frameset.dtd\">"
+
+class GfxState;
+class GooString;
+//------------------------------------------------------------------------
+// HtmlString
+//------------------------------------------------------------------------
+
+enum UnicodeTextDirection {
+  textDirUnknown,
+  textDirLeftRight,
+  textDirRightLeft,
+  textDirTopBottom
+};
+
+
+class HtmlString {
+public:
+
+  // Constructor.
+  HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts);
+
+  // Destructor.
+  ~HtmlString();
+
+  // Add a character to the string.
+  void addChar(GfxState *state, double x, double y,
+	       double dx, double dy,
+	       Unicode u); 
+  HtmlLink* getLink() { return link; }
+  void endString(); // postprocessing
+
+private:
+// aender die text variable
+  HtmlLink *link;
+  double xMin, xMax;		// bounding box x coordinates
+  double yMin, yMax;		// bounding box y coordinates
+  int col;			// starting column
+  Unicode *text;		// the text
+  double *xRight;		// right-hand x coord of each char
+  HtmlString *yxNext;		// next string in y-major order
+  HtmlString *xyNext;		// next string in x-major order
+  int fontpos;
+  GooString* htext;
+  int len;			// length of text and xRight
+  int size;			// size of text and xRight arrays
+  UnicodeTextDirection dir;	// direction (left to right/right to left)
+  
+  friend class HtmlPage;
+
+};
+
+
+//------------------------------------------------------------------------
+// HtmlPage
+//------------------------------------------------------------------------
+
+
+
+class HtmlPage {
+public:
+
+  // Constructor.
+  HtmlPage(GBool rawOrder, char *imgExtVal);
+
+  // Destructor.
+  ~HtmlPage();
+
+  // Begin a new string.
+  void beginString(GfxState *state, GooString *s);
+
+  // Add a character to the current string.
+  void addChar(GfxState *state, double x, double y,
+	       double dx, double dy, 
+		double ox, double oy, 
+		Unicode *u, int uLen); //Guchar c);
+
+  void updateFont(GfxState *state);
+
+  // End the current string, sorting it into the list of strings.
+  void endString();
+
+  // Coalesce strings that look like parts of the same line.
+  void coalesce();
+
+  // Find a string.  If <top> is true, starts looking at top of page;
+  // otherwise starts looking at <xMin>,<yMin>.  If <bottom> is true,
+  // stops looking at bottom of page; otherwise stops looking at
+  // <xMax>,<yMax>.  If found, sets the text bounding rectange and
+  // returns true; otherwise returns false.
+  
+
+  // new functions
+  void AddLink(const HtmlLink& x){
+    links->AddLink(x);
+  }
+
+ void dump(FILE *f, int pageNum);
+
+  // Clear the page.
+  void clear();
+  
+  void conv();
+private:
+  HtmlFont* getFont(HtmlString *hStr) { return fonts->Get(hStr->fontpos); }
+
+  double fontSize;		// current font size
+  GBool rawOrder;		// keep strings in content stream order
+
+  HtmlString *curStr;		// currently active string
+
+  HtmlString *yxStrings;	// strings in y-major order
+  HtmlString *xyStrings;	// strings in x-major order
+  HtmlString *yxCur1, *yxCur2;	// cursors for yxStrings list
+  
+  void setDocName(char* fname);
+  void dumpAsXML(FILE* f,int page);
+  void dumpComplex(FILE* f, int page);
+
+  // marks the position of the fonts that belong to current page (for noframes)
+  int fontsPageMarker; 
+  HtmlFontAccu *fonts;
+  HtmlLinks *links; 
+  
+  GooString *DocName;
+  GooString *imgExt;
+  int pageWidth;
+  int pageHeight;
+  static int pgNum;
+  int firstPage;                // used to begin the numeration of pages
+
+  friend class HtmlOutputDev;
+};
+
+//------------------------------------------------------------------------
+// HtmlMetaVar
+//------------------------------------------------------------------------
+class HtmlMetaVar {
+public:
+    HtmlMetaVar(char *_name, char *_content);
+    ~HtmlMetaVar();    
+    
+    GooString* toString();	
+
+private:
+
+    GooString *name;
+    GooString *content;
+};
+
+//------------------------------------------------------------------------
+// HtmlOutputDev
+//------------------------------------------------------------------------
+
+class HtmlOutputDev: public OutputDev {
+public:
+
+  // Open a text output file.  If <fileName> is NULL, no file is written
+  // (this is useful, e.g., for searching text).  If <useASCII7> is true,
+  // text is converted to 7-bit ASCII; otherwise, text is converted to
+  // 8-bit ISO Latin-1.  <useASCII7> should also be set for Japanese
+  // (EUC-JP) text.  If <rawOrder> is true, the text is kept in content
+  // stream order.
+  HtmlOutputDev(char *fileName, char *title, 
+	  char *author,
+	  char *keywords,
+	  char *subject,
+	  char *date,
+	  char *extension,
+	  GBool rawOrder,
+	  int firstPage = 1,
+	  GBool outline = 0);
+
+  // Destructor.
+  virtual ~HtmlOutputDev();
+
+  // Check if file was successfully created.
+  virtual GBool isOk() { return ok; }
+
+  //---- get info about output device
+
+  // Does this device use upside-down coordinates?
+  // (Upside-down means (0,0) is the top left corner of the page.)
+  virtual GBool upsideDown() { return gTrue; }
+
+  // Does this device use drawChar() or drawString()?
+  virtual GBool useDrawChar() { return gTrue; }
+
+  // Does this device use beginType3Char/endType3Char?  Otherwise,
+  // text in Type 3 fonts will be drawn with drawChar/drawString.
+  virtual GBool interpretType3Chars() { return gFalse; }
+
+  // Does this device need non-text content?
+  virtual GBool needNonText() { return gFalse; }
+
+  //----- initialization and control
+
+  // Start a page.
+  virtual void startPage(int pageNum, GfxState *state);
+
+  // End a page.
+  virtual void endPage();
+
+  //----- update text state
+  virtual void updateFont(GfxState *state);
+
+  //----- text drawing
+  virtual void beginString(GfxState *state, GooString *s);
+  virtual void endString(GfxState *state);
+  virtual void drawChar(GfxState *state, double x, double y,
+			double dx, double dy,
+			double originX, double originY,
+			CharCode code, Unicode *u, int uLen);
+  
+  virtual void drawImageMask(GfxState *state, Object *ref, 
+			     Stream *str,
+			     int width, int height, GBool invert,
+			     GBool inlineImg);
+  virtual void drawImage(GfxState *state, Object *ref, Stream *str,
+			  int width, int height, GfxImageColorMap *colorMap,
+			 int *maskColors, GBool inlineImg);
+
+  //new feature    
+  virtual int DevType() {return 1234;}
+  virtual void drawLink(Link *link,Catalog *cat); 
+
+  int getPageWidth() { return maxPageWidth; }
+  int getPageHeight() { return maxPageHeight; }
+
+  GBool dumpDocOutline(Catalog* catalog);
+
+  /*  char* getFileNameFromPath(char* c, int strlen); */
+
+private:
+  // convert encoding into a HTML standard, or encoding->getCString if not
+  // recognized
+  static char* mapEncodingToHtml(GooString* encoding);
+  GooString* getLinkDest(Link *link,Catalog *catalog);
+  void dumpMetaVars(FILE *);
+  void doFrame(int firstPage);
+  GBool newOutlineLevel(FILE *output, Object *node, Catalog* catalog, int level = 1);
+
+  FILE *fContentsFrame;
+  FILE *page;                   // html file
+  //FILE *tin;                    // image log file
+  //GBool write;
+  GBool needClose;		// need to close the file?
+  HtmlPage *pages;		// text for the current page
+  GBool rawOrder;		// keep text in content stream order
+  GBool doOutline;		// output document outline
+  GBool ok;			// set up ok?
+  GBool dumpJPEG;
+  int pageNum;
+  int maxPageWidth;
+  int maxPageHeight;
+  static int imgNum;
+  GooString *Docname;
+  GooString *docTitle;
+  GooList *glMetaVars;
+  friend class HtmlPage;
+};
+
+char* getFileNameFromPath(char* c, int strlen);
+
+#endif
diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc
new file mode 100644
index 00000000..9789a748
--- /dev/null
+++ b/utils/ImageOutputDev.cc
@@ -0,0 +1,195 @@
+//========================================================================
+//
+// ImageOutputDev.cc
+//
+// Copyright 1998-2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+#include <poppler-config.h>
+
+#ifdef USE_GCC_PRAGMAS
+#pragma implementation
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <ctype.h>
+#include "goo/gmem.h"
+#include "config.h"
+#include "Error.h"
+#include "GfxState.h"
+#include "Object.h"
+#include "Stream.h"
+#include "DCTStream.h"
+#include "ImageOutputDev.h"
+
+ImageOutputDev::ImageOutputDev(char *fileRootA, GBool dumpJPEGA) {
+  fileRoot = copyString(fileRootA);
+  fileName = (char *)gmalloc(strlen(fileRoot) + 20);
+  dumpJPEG = dumpJPEGA;
+  imgNum = 0;
+  ok = gTrue;
+}
+
+ImageOutputDev::~ImageOutputDev() {
+  gfree(fileName);
+  gfree(fileRoot);
+}
+
+void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
+				   int width, int height, GBool invert,
+				   GBool inlineImg) {
+  FILE *f;
+  int c;
+  int size, i;
+
+  // dump JPEG file
+  if (dumpJPEG && str->getKind() == strDCT && !inlineImg) {
+
+    // open the image file
+    sprintf(fileName, "%s-%03d.jpg", fileRoot, imgNum);
+    ++imgNum;
+    if (!(f = fopen(fileName, "wb"))) {
+      error(-1, "Couldn't open image file '%s'", fileName);
+      return;
+    }
+
+    // initialize stream
+    str = ((DCTStream *)str)->getRawStream();
+    str->reset();
+
+    // copy the stream
+    while ((c = str->getChar()) != EOF)
+      fputc(c, f);
+
+    str->close();
+    fclose(f);
+
+  // dump PBM file
+  } else {
+
+    // open the image file and write the PBM header
+    sprintf(fileName, "%s-%03d.pbm", fileRoot, imgNum);
+    ++imgNum;
+    if (!(f = fopen(fileName, "wb"))) {
+      error(-1, "Couldn't open image file '%s'", fileName);
+      return;
+    }
+    fprintf(f, "P4\n");
+    fprintf(f, "%d %d\n", width, height);
+
+    // initialize stream
+    str->reset();
+
+    // copy the stream
+    size = height * ((width + 7) / 8);
+    for (i = 0; i < size; ++i) {
+      fputc(str->getChar(), f);
+    }
+
+    str->close();
+    fclose(f);
+  }
+}
+
+void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
+			       int width, int height,
+			       GfxImageColorMap *colorMap,
+			       int *maskColors, GBool inlineImg) {
+  FILE *f;
+  ImageStream *imgStr;
+  Guchar *p;
+  GfxRGB rgb;
+  int x, y;
+  int c;
+  int size, i;
+
+  // dump JPEG file
+  if (dumpJPEG && str->getKind() == strDCT &&
+      colorMap->getNumPixelComps() == 3 &&
+      !inlineImg) {
+
+    // open the image file
+    sprintf(fileName, "%s-%03d.jpg", fileRoot, imgNum);
+    ++imgNum;
+    if (!(f = fopen(fileName, "wb"))) {
+      error(-1, "Couldn't open image file '%s'", fileName);
+      return;
+    }
+
+    // initialize stream
+    str = ((DCTStream *)str)->getRawStream();
+    str->reset();
+
+    // copy the stream
+    while ((c = str->getChar()) != EOF)
+      fputc(c, f);
+
+    str->close();
+    fclose(f);
+
+  // dump PBM file
+  } else if (colorMap->getNumPixelComps() == 1 &&
+	     colorMap->getBits() == 1) {
+
+    // open the image file and write the PBM header
+    sprintf(fileName, "%s-%03d.pbm", fileRoot, imgNum);
+    ++imgNum;
+    if (!(f = fopen(fileName, "wb"))) {
+      error(-1, "Couldn't open image file '%s'", fileName);
+      return;
+    }
+    fprintf(f, "P4\n");
+    fprintf(f, "%d %d\n", width, height);
+
+    // initialize stream
+    str->reset();
+
+    // copy the stream
+    size = height * ((width + 7) / 8);
+    for (i = 0; i < size; ++i) {
+      fputc(str->getChar() ^ 0xff, f);
+    }
+
+    str->close();
+    fclose(f);
+
+  // dump PPM file
+  } else {
+
+    // open the image file and write the PPM header
+    sprintf(fileName, "%s-%03d.ppm", fileRoot, imgNum);
+    ++imgNum;
+    if (!(f = fopen(fileName, "wb"))) {
+      error(-1, "Couldn't open image file '%s'", fileName);
+      return;
+    }
+    fprintf(f, "P6\n");
+    fprintf(f, "%d %d\n", width, height);
+    fprintf(f, "255\n");
+
+    // initialize stream
+    imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(),
+			     colorMap->getBits());
+    imgStr->reset();
+
+    // for each line...
+    for (y = 0; y < height; ++y) {
+
+      // write the line
+      p = imgStr->getLine();
+      for (x = 0; x < width; ++x) {
+	colorMap->getRGB(p, &rgb);
+	fputc((int)(rgb.r * 255 + 0.5), f);
+	fputc((int)(rgb.g * 255 + 0.5), f);
+	fputc((int)(rgb.b * 255 + 0.5), f);
+	p += colorMap->getNumPixelComps();
+      }
+    }
+    delete imgStr;
+
+    fclose(f);
+  }
+}
diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h
new file mode 100644
index 00000000..404e2f8c
--- /dev/null
+++ b/utils/ImageOutputDev.h
@@ -0,0 +1,76 @@
+//========================================================================
+//
+// ImageOutputDev.h
+//
+// Copyright 1998-2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+#ifndef IMAGEOUTPUTDEV_H
+#define IMAGEOUTPUTDEV_H
+
+#include <poppler-config.h>
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include <stdio.h>
+#include "goo/gtypes.h"
+#include "OutputDev.h"
+
+class GfxState;
+
+//------------------------------------------------------------------------
+// ImageOutputDev
+//------------------------------------------------------------------------
+
+class ImageOutputDev: public OutputDev {
+public:
+
+  // Create an OutputDev which will write images to files named
+  // <fileRoot>-NNN.<type>.  Normally, all images are written as PBM
+  // (.pbm) or PPM (.ppm) files.  If <dumpJPEG> is set, JPEG images are
+  // written as JPEG (.jpg) files.
+  ImageOutputDev(char *fileRootA, GBool dumpJPEGA);
+
+  // Destructor.
+  virtual ~ImageOutputDev();
+
+  // Check if file was successfully created.
+  virtual GBool isOk() { return ok; }
+
+  // Does this device use beginType3Char/endType3Char?  Otherwise,
+  // text in Type 3 fonts will be drawn with drawChar/drawString.
+  virtual GBool interpretType3Chars() { return gFalse; }
+
+  // Does this device need non-text content?
+  virtual GBool needNonText() { return gFalse; }
+
+  //---- get info about output device
+
+  // Does this device use upside-down coordinates?
+  // (Upside-down means (0,0) is the top left corner of the page.)
+  virtual GBool upsideDown() { return gTrue; }
+
+  // Does this device use drawChar() or drawString()?
+  virtual GBool useDrawChar() { return gFalse; }
+
+  //----- image drawing
+  virtual void drawImageMask(GfxState *state, Object *ref, Stream *str,
+			     int width, int height, GBool invert,
+			     GBool inlineImg);
+  virtual void drawImage(GfxState *state, Object *ref, Stream *str,
+			 int width, int height, GfxImageColorMap *colorMap,
+			 int *maskColors, GBool inlineImg);
+
+private:
+
+  char *fileRoot;		// root of output file names
+  char *fileName;		// buffer for output file names
+  GBool dumpJPEG;		// set to dump native JPEG files
+  int imgNum;			// current image number
+  GBool ok;			// set up ok?
+};
+
+#endif
diff --git a/utils/Makefile.am b/utils/Makefile.am
new file mode 100644
index 00000000..9ddef40d
--- /dev/null
+++ b/utils/Makefile.am
@@ -0,0 +1,18 @@
+INCLUDES =					\
+	-I$(top_srcdir)				\
+	-I$(top_srcdir)/poppler
+
+LDADD =						\
+	$(top_builddir)/poppler/libpoppler.la
+
+bin_PROGRAMS = pdffonts pdfimages pdfinfo pdftops pdftotext pdftohtml
+
+man1_MANS = pdffonts.1 pdfimages.1 pdfinfo.1 pdftops.1 pdftotext.1 pdftohtml.1
+
+pdffonts_SOURCES	= pdffonts.cc parseargs.c
+pdfimages_SOURCES	= pdfimages.cc ImageOutputDev.cc parseargs.c
+pdfinfo_SOURCES		= pdfinfo.cc parseargs.c
+pdftops_SOURCES		= pdftops.cc parseargs.c
+pdftotext_SOURCES	= pdftotext.cc parseargs.c
+pdftohtml_SOURCES	= pdftohtml.cc parseargs.c	\
+	HtmlFonts.cc HtmlLinks.cc HtmlOutputDev.cc
diff --git a/utils/parseargs.c b/utils/parseargs.c
new file mode 100644
index 00000000..9f579436
--- /dev/null
+++ b/utils/parseargs.c
@@ -0,0 +1,190 @@
+/*
+ * parseargs.h
+ *
+ * Command line argument parser.
+ *
+ * Copyright 1996-2003 Glyph & Cog, LLC
+ */
+
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include "parseargs.h"
+
+static ArgDesc *findArg(ArgDesc *args, char *arg);
+static GBool grabArg(ArgDesc *arg, int i, int *argc, char *argv[]);
+
+GBool parseArgs(ArgDesc *args, int *argc, char *argv[]) {
+  ArgDesc *arg;
+  int i, j;
+  GBool ok;
+
+  ok = gTrue;
+  i = 1;
+  while (i < *argc) {
+    if (!strcmp(argv[i], "--")) {
+      --*argc;
+      for (j = i; j < *argc; ++j)
+	argv[j] = argv[j+1];
+      break;
+    } else if ((arg = findArg(args, argv[i]))) {
+      if (!grabArg(arg, i, argc, argv))
+	ok = gFalse;
+    } else {
+      ++i;
+    }
+  }
+  return ok;
+}
+
+void printUsage(char *program, char *otherArgs, ArgDesc *args) {
+  ArgDesc *arg;
+  char *typ;
+  int w, w1;
+
+  w = 0;
+  for (arg = args; arg->arg; ++arg) {
+    if ((w1 = strlen(arg->arg)) > w)
+      w = w1;
+  }
+
+  fprintf(stderr, "Usage: %s [options]", program);
+  if (otherArgs)
+    fprintf(stderr, " %s", otherArgs);
+  fprintf(stderr, "\n");
+
+  for (arg = args; arg->arg; ++arg) {
+    fprintf(stderr, "  %s", arg->arg);
+    w1 = 9 + w - strlen(arg->arg);
+    switch (arg->kind) {
+    case argInt:
+    case argIntDummy:
+      typ = " <int>";
+      break;
+    case argFP:
+    case argFPDummy:
+      typ = " <fp>";
+      break;
+    case argString:
+    case argStringDummy:
+      typ = " <string>";
+      break;
+    case argFlag:
+    case argFlagDummy:
+    default:
+      typ = "";
+      break;
+    }
+    fprintf(stderr, "%-*s", w1, typ);
+    if (arg->usage)
+      fprintf(stderr, ": %s", arg->usage);
+    fprintf(stderr, "\n");
+  }
+}
+
+static ArgDesc *findArg(ArgDesc *args, char *arg) {
+  ArgDesc *p;
+
+  for (p = args; p->arg; ++p) {
+    if (p->kind < argFlagDummy && !strcmp(p->arg, arg))
+      return p;
+  }
+  return NULL;
+}
+
+static GBool grabArg(ArgDesc *arg, int i, int *argc, char *argv[]) {
+  int n;
+  int j;
+  GBool ok;
+
+  ok = gTrue;
+  n = 0;
+  switch (arg->kind) {
+  case argFlag:
+    *(GBool *)arg->val = gTrue;
+    n = 1;
+    break;
+  case argInt:
+    if (i + 1 < *argc && isInt(argv[i+1])) {
+      *(int *)arg->val = atoi(argv[i+1]);
+      n = 2;
+    } else {
+      ok = gFalse;
+      n = 1;
+    }
+    break;
+  case argFP:
+    if (i + 1 < *argc && isFP(argv[i+1])) {
+      *(double *)arg->val = atof(argv[i+1]);
+      n = 2;
+    } else {
+      ok = gFalse;
+      n = 1;
+    }
+    break;
+  case argString:
+    if (i + 1 < *argc) {
+      strncpy((char *)arg->val, argv[i+1], arg->size - 1);
+      ((char *)arg->val)[arg->size - 1] = '\0';
+      n = 2;
+    } else {
+      ok = gFalse;
+      n = 1;
+    }
+    break;
+  default:
+    fprintf(stderr, "Internal error in arg table\n");
+    n = 1;
+    break;
+  }
+  if (n > 0) {
+    *argc -= n;
+    for (j = i; j < *argc; ++j)
+      argv[j] = argv[j+n];
+  }
+  return ok;
+}
+
+GBool isInt(char *s) {
+  if (*s == '-' || *s == '+')
+    ++s;
+  while (isdigit(*s))
+    ++s;
+  if (*s)
+    return gFalse;
+  return gTrue;
+}
+
+GBool isFP(char *s) {
+  int n;
+
+  if (*s == '-' || *s == '+')
+    ++s;
+  n = 0;
+  while (isdigit(*s)) {
+    ++s;
+    ++n;
+  }
+  if (*s == '.')
+    ++s;
+  while (isdigit(*s)) {
+    ++s;
+    ++n;
+  }
+  if (n > 0 && (*s == 'e' || *s == 'E')) {
+    ++s;
+    if (*s == '-' || *s == '+')
+      ++s;
+    n = 0;
+    if (!isdigit(*s))
+      return gFalse;
+    do {
+      ++s;
+    } while (isdigit(*s));
+  }
+  if (*s)
+    return gFalse;
+  return gTrue;
+}
diff --git a/utils/parseargs.h b/utils/parseargs.h
new file mode 100644
index 00000000..1b1c570e
--- /dev/null
+++ b/utils/parseargs.h
@@ -0,0 +1,71 @@
+/*
+ * parseargs.h
+ *
+ * Command line argument parser.
+ *
+ * Copyright 1996-2003 Glyph & Cog, LLC
+ */
+
+#ifndef PARSEARGS_H
+#define PARSEARGS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "goo/gtypes.h"
+
+/*
+ * Argument kinds.
+ */
+typedef enum {
+  argFlag,			/* flag (present / not-present) */
+				/*   [val: GBool *]             */
+  argInt,			/* integer arg    */
+				/*   [val: int *] */
+  argFP,			/* floating point arg */
+				/*   [val: double *]  */
+  argString,			/* string arg      */
+				/*   [val: char *] */
+  /* dummy entries -- these show up in the usage listing only; */
+  /* useful for X args, for example                            */
+  argFlagDummy,
+  argIntDummy,
+  argFPDummy,
+  argStringDummy
+} ArgKind;
+
+/*
+ * Argument descriptor.
+ */
+typedef struct {
+  char *arg;			/* the command line switch */
+  ArgKind kind;			/* kind of arg */
+  void *val;			/* place to store value */
+  int size;			/* for argString: size of string */
+  char *usage;			/* usage string */
+} ArgDesc;
+
+/*
+ * Parse command line.  Removes all args which are found in the arg
+ * descriptor list <args>.  Stops parsing if "--" is found (and removes
+ * it).  Returns gFalse if there was an error.
+ */
+extern GBool parseArgs(ArgDesc *args, int *argc, char *argv[]);
+
+/*
+ * Print usage message, based on arg descriptor list.
+ */
+extern void printUsage(char *program, char *otherArgs, ArgDesc *args);
+
+/*
+ * Check if a string is a valid integer or floating point number.
+ */
+extern GBool isInt(char *s);
+extern GBool isFP(char *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/utils/pdffonts.1 b/utils/pdffonts.1
new file mode 100644
index 00000000..73246903
--- /dev/null
+++ b/utils/pdffonts.1
@@ -0,0 +1,128 @@
+.\" Copyright 1999-2004 Glyph & Cog, LLC
+.TH pdffonts 1 "22 January 2004"
+.SH NAME
+pdffonts \- Portable Document Format (PDF) font analyzer (version
+3.00)
+.SH SYNOPSIS
+.B pdffonts
+[options]
+.RI [ PDF-file ]
+.SH DESCRIPTION
+.B Pdffonts
+lists the fonts used in a Portable Document Format (PDF) file along
+with various information for each font.
+.PP
+The following information is listed for each font:
+.TP
+.B name
+the font name, exactly as given in the PDF file (potentially including
+a subset prefix)
+.TP
+.B type
+the font type -- see below for details
+.TP
+.B emb
+"yes" if the font is embedded in the PDF file
+.TP
+.B sub
+"yes" if the font is a subset
+.TP
+.B uni
+"yes" if there is an explicit "ToUnicode" map in the PDF file (the
+absence of a ToUnicode map doesn't necessarily mean that the text
+can't be converted to Unicode)
+.TP
+.B object ID
+the font dictionary object ID (number and generation)
+.PP
+PDF files can contain the following types of fonts:
+.PP
+.RS
+Type 1
+.RE
+.RS
+Type 1C -- aka Compact Font Format (CFF)
+.RE
+.RS
+Type 3
+.RE
+.RS
+TrueType
+.RE
+.RS
+CID Type 0 -- 16-bit font with no specified type
+.RE
+.RS
+CID Type 0C -- 16-bit PostScript CFF font
+.RE
+.RS
+CID TrueType -- 16-bit TrueType font
+.RE
+.SH CONFIGURATION FILE
+Pdffonts reads a configuration file at startup.  It first tries to
+find the user's private config file, ~/.xpdfrc.  If that doesn't
+exist, it looks for a system-wide config file, /etc/xpdf/xpdfrc.  See the
+.BR xpdfrc (5)
+man page for details.
+.SH OPTIONS
+Many of the following options can be set with configuration file
+commands.  These are listed in square brackets with the description of
+the corresponding command line option.
+.TP
+.BI \-f " number"
+Specifies the first page to analyze.
+.TP
+.BI \-l " number"
+Specifies the last page to analyze.
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file.  Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.BI \-cfg " config-file"
+Read
+.I config-file
+in place of ~/.xpdfrc or the system-wide config file.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdffonts software and documentation are copyright 1996-2004 Glyph
+& Cog, LLC.
+.SH "SEE ALSO"
+.BR xpdf (1),
+.BR pdftops (1),
+.BR pdftotext (1),
+.BR pdfinfo (1),
+.BR pdftoppm (1),
+.BR pdfimages (1),
+.BR xpdfrc (5)
+.br
+.B http://www.foolabs.com/xpdf/
diff --git a/utils/pdffonts.cc b/utils/pdffonts.cc
new file mode 100644
index 00000000..e4530d22
--- /dev/null
+++ b/utils/pdffonts.cc
@@ -0,0 +1,294 @@
+//========================================================================
+//
+// pdffonts.cc
+//
+// Copyright 2001-2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <math.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "GlobalParams.h"
+#include "Error.h"
+#include "Object.h"
+#include "Dict.h"
+#include "GfxFont.h"
+#include "Annot.h"
+#include "PDFDoc.h"
+#include "config.h"
+
+static char *fontTypeNames[] = {
+  "unknown",
+  "Type 1",
+  "Type 1C",
+  "Type 3",
+  "TrueType",
+  "CID Type 0",
+  "CID Type 0C",
+  "CID TrueType"
+};
+
+static void scanFonts(Dict *resDict, PDFDoc *doc);
+static void scanFont(GfxFont *font, PDFDoc *doc);
+
+static int firstPage = 1;
+static int lastPage = 0;
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static char cfgFileName[256] = "";
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static ArgDesc argDesc[] = {
+  {"-f",      argInt,      &firstPage,     0,
+   "first page to examine"},
+  {"-l",      argInt,      &lastPage,      0,
+   "last page to examine"},
+  {"-opw",    argString,   ownerPassword,  sizeof(ownerPassword),
+   "owner password (for encrypted files)"},
+  {"-upw",    argString,   userPassword,   sizeof(userPassword),
+   "user password (for encrypted files)"},
+  {"-cfg",        argString,      cfgFileName,    sizeof(cfgFileName),
+   "configuration file to use in place of .xpdfrc"},
+  {"-v",      argFlag,     &printVersion,  0,
+   "print copyright and version info"},
+  {"-h",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-help",   argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"--help",  argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-?",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {NULL}
+};
+
+static Ref *fonts;
+static int fontsLen;
+static int fontsSize;
+
+int main(int argc, char *argv[]) {
+  PDFDoc *doc;
+  GooString *fileName;
+  GooString *ownerPW, *userPW;
+  GBool ok;
+  Page *page;
+  Dict *resDict;
+  Annots *annots;
+  Object obj1, obj2;
+  int pg, i;
+  int exitCode;
+
+  exitCode = 99;
+
+  // parse args
+  ok = parseArgs(argDesc, &argc, argv);
+  if (!ok || argc != 2 || printVersion || printHelp) {
+    fprintf(stderr, "pdffonts version %s\n", xpdfVersion);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdffonts", "<PDF-file>", argDesc);
+    }
+    goto err0;
+  }
+  fileName = new GooString(argv[1]);
+
+  // read config file
+  globalParams = new GlobalParams(cfgFileName);
+
+  // open PDF file
+  if (ownerPassword[0] != '\001') {
+    ownerPW = new GooString(ownerPassword);
+  } else {
+    ownerPW = NULL;
+  }
+  if (userPassword[0] != '\001') {
+    userPW = new GooString(userPassword);
+  } else {
+    userPW = NULL;
+  }
+  doc = new PDFDoc(fileName, ownerPW, userPW);
+  if (userPW) {
+    delete userPW;
+  }
+  if (ownerPW) {
+    delete ownerPW;
+  }
+  if (!doc->isOk()) {
+    exitCode = 1;
+    goto err1;
+  }
+
+  // get page range
+  if (firstPage < 1) {
+    firstPage = 1;
+  }
+  if (lastPage < 1 || lastPage > doc->getNumPages()) {
+    lastPage = doc->getNumPages();
+  }
+
+  // scan the fonts
+  printf("name                                 type         emb sub uni object ID\n");
+  printf("------------------------------------ ------------ --- --- --- ---------\n");
+  fonts = NULL;
+  fontsLen = fontsSize = 0;
+  for (pg = firstPage; pg <= lastPage; ++pg) {
+    page = doc->getCatalog()->getPage(pg);
+    if ((resDict = page->getResourceDict())) {
+      scanFonts(resDict, doc);
+    }
+    annots = new Annots(doc->getXRef(),
+			doc->getCatalog(),
+			page->getAnnots(&obj1));
+    obj1.free();
+    for (i = 0; i < annots->getNumAnnots(); ++i) {
+      if (annots->getAnnot(i)->getAppearance(&obj1)->isStream()) {
+	obj1.streamGetDict()->lookup("Resources", &obj2);
+	if (obj2.isDict()) {
+	  scanFonts(obj2.getDict(), doc);
+	}
+	obj2.free();
+      }
+      obj1.free();
+    }
+    delete annots;
+  }
+
+  exitCode = 0;
+
+  // clean up
+  gfree(fonts);
+ err1:
+  delete doc;
+  delete globalParams;
+ err0:
+
+  // check for memory leaks
+  Object::memCheck(stderr);
+  gMemReport(stderr);
+
+  return exitCode;
+}
+
+static void scanFonts(Dict *resDict, PDFDoc *doc) {
+  Object obj1, obj2, xObjDict, xObj, resObj;
+  Ref r;
+  GfxFontDict *gfxFontDict;
+  GfxFont *font;
+  int i;
+
+  // scan the fonts in this resource dictionary
+  gfxFontDict = NULL;
+  resDict->lookupNF("Font", &obj1);
+  if (obj1.isRef()) {
+    obj1.fetch(doc->getXRef(), &obj2);
+    if (obj2.isDict()) {
+      r = obj1.getRef();
+      gfxFontDict = new GfxFontDict(doc->getXRef(), &r, obj2.getDict());
+    }
+    obj2.free();
+  } else if (obj1.isDict()) {
+    gfxFontDict = new GfxFontDict(doc->getXRef(), NULL, obj1.getDict());
+  }
+  if (gfxFontDict) {
+    for (i = 0; i < gfxFontDict->getNumFonts(); ++i) {
+      if ((font = gfxFontDict->getFont(i))) {
+	scanFont(font, doc);
+      }
+    }
+    delete gfxFontDict;
+  }
+  obj1.free();
+
+  // recursively scan any resource dictionaries in objects in this
+  // resource dictionary
+  resDict->lookup("XObject", &xObjDict);
+  if (xObjDict.isDict()) {
+    for (i = 0; i < xObjDict.dictGetLength(); ++i) {
+      xObjDict.dictGetVal(i, &xObj);
+      if (xObj.isStream()) {
+	xObj.streamGetDict()->lookup("Resources", &resObj);
+	if (resObj.isDict()) {
+	  scanFonts(resObj.getDict(), doc);
+	}
+	resObj.free();
+      }
+      xObj.free();
+    }
+  }
+  xObjDict.free();
+}
+
+static void scanFont(GfxFont *font, PDFDoc *doc) {
+  Ref fontRef, embRef;
+  Object fontObj, toUnicodeObj;
+  GooString *name;
+  GBool emb, subset, hasToUnicode;
+  int i;
+
+  fontRef = *font->getID();
+
+  // check for an already-seen font
+  for (i = 0; i < fontsLen; ++i) {
+    if (fontRef.num == fonts[i].num && fontRef.gen == fonts[i].gen) {
+      return;
+    }
+  }
+
+  // font name
+  name = font->getOrigName();
+
+  // check for an embedded font
+  if (font->getType() == fontType3) {
+    emb = gTrue;
+  } else {
+    emb = font->getEmbeddedFontID(&embRef);
+  }
+
+  // look for a ToUnicode map
+  hasToUnicode = gFalse;
+  if (doc->getXRef()->fetch(fontRef.num, fontRef.gen, &fontObj)->isDict()) {
+    hasToUnicode = fontObj.dictLookup("ToUnicode", &toUnicodeObj)->isStream();
+    toUnicodeObj.free();
+  }
+  fontObj.free();
+
+  // check for a font subset name: capital letters followed by a '+'
+  // sign
+  subset = gFalse;
+  if (name) {
+    for (i = 0; i < name->getLength(); ++i) {
+      if (name->getChar(i) < 'A' || name->getChar(i) > 'Z') {
+	break;
+      }
+    }
+    subset = i > 0 && i < name->getLength() && name->getChar(i) == '+';
+  }
+
+  // print the font info
+  printf("%-36s %-12s %-3s %-3s %-3s",
+	 name ? name->getCString() : "[none]",
+	 fontTypeNames[font->getType()],
+	 emb ? "yes" : "no",
+	 subset ? "yes" : "no",
+	 hasToUnicode ? "yes" : "no");
+  if (fontRef.gen >= 100000) {
+    printf(" [none]\n");
+  } else {
+    printf(" %6d %2d\n", fontRef.num, fontRef.gen);
+  }
+
+  // add this font to the list
+  if (fontsLen == fontsSize) {
+    fontsSize += 32;
+    fonts = (Ref *)grealloc(fonts, fontsSize * sizeof(Ref));
+  }
+  fonts[fontsLen++] = *font->getID();
+}
diff --git a/utils/pdfimages.1 b/utils/pdfimages.1
new file mode 100644
index 00000000..c580625e
--- /dev/null
+++ b/utils/pdfimages.1
@@ -0,0 +1,96 @@
+.\" Copyright 1998-2004 Glyph & Cog, LLC
+.TH pdfimages 1 "22 January 2004"
+.SH NAME
+pdfimages \- Portable Document Format (PDF) image extractor
+(version 3.00)
+.SH SYNOPSIS
+.B pdfimages
+[options]
+.I PDF-file image-root
+.SH DESCRIPTION
+.B Pdfimages
+saves images from a Portable Document Format (PDF) file as Portable
+Pixmap (PPM), Portable Bitmap (PBM), or JPEG files.
+.PP
+Pdfimages reads the PDF file
+.IR PDF-file ,
+scans one or more pages, and writes one PPM, PBM, or JPEG file for each image,
+.IR image-root - nnn . xxx ,
+where
+.I nnn
+is the image number and
+.I xxx
+is the image type (.ppm, .pbm, .jpg).
+.SH CONFIGURATION FILE
+Pdfimages reads a configuration file at startup.  It first tries to
+find the user's private config file, ~/.xpdfrc.  If that doesn't
+exist, it looks for a system-wide config file, /etc/xpdf/xpdfrc.  See the
+.BR xpdfrc (5)
+man page for details.
+.SH OPTIONS
+Many of the following options can be set with configuration file
+commands.  These are listed in square brackets with the description of
+the corresponding command line option.
+.TP
+.BI \-f " number"
+Specifies the first page to scan.
+.TP
+.BI \-l " number"
+Specifies the last page to scan.
+.TP
+.B \-j
+Normally, all images are written as PBM (for monochrome images) or PPM
+(for non-monochrome images) files.  With this option, images in DCT
+format are saved as JPEG files.  All non-DCT images are saved in
+PBM/PPM format as usual.
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file.  Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.B \-q
+Don't print any messages or errors.
+.RB "[config file: " errQuiet ]
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdfimages software and documentation are copyright 1998-2004 Glyph
+& Cog, LLC.
+.SH "SEE ALSO"
+.BR xpdf (1),
+.BR pdftops (1),
+.BR pdftotext (1),
+.BR pdfinfo (1),
+.BR pdffonts (1),
+.BR pdftoppm (1),
+.BR xpdfrc (5)
+.br
+.B http://www.foolabs.com/xpdf/
diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc
new file mode 100644
index 00000000..be020ec8
--- /dev/null
+++ b/utils/pdfimages.cc
@@ -0,0 +1,159 @@
+//========================================================================
+//
+// pdfimages.cc
+//
+// Copyright 1998-2003 Glyph & Cog, LLC
+//
+// Modified for Debian by Hamish Moffatt, 22 May 2002.
+//
+//========================================================================
+
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Array.h"
+#include "Dict.h"
+#include "XRef.h"
+#include "Catalog.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "ImageOutputDev.h"
+#include "Error.h"
+#include "config.h"
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool dumpJPEG = gFalse;
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static GBool quiet = gFalse;
+static char cfgFileName[256] = "";
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static ArgDesc argDesc[] = {
+  {"-f",      argInt,      &firstPage,     0,
+   "first page to convert"},
+  {"-l",      argInt,      &lastPage,      0,
+   "last page to convert"},
+  {"-j",      argFlag,     &dumpJPEG,      0,
+   "write JPEG images as JPEG files"},
+  {"-opw",    argString,   ownerPassword,  sizeof(ownerPassword),
+   "owner password (for encrypted files)"},
+  {"-upw",    argString,   userPassword,   sizeof(userPassword),
+   "user password (for encrypted files)"},
+  {"-q",      argFlag,     &quiet,         0,
+   "don't print any messages or errors"},
+  {"-cfg",        argString,      cfgFileName,    sizeof(cfgFileName),
+   "configuration file to use in place of .xpdfrc"},
+  {"-v",      argFlag,     &printVersion,  0,
+   "print copyright and version info"},
+  {"-h",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-help",   argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"--help",  argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-?",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {NULL}
+};
+
+int main(int argc, char *argv[]) {
+  PDFDoc *doc;
+  GooString *fileName;
+  char *imgRoot;
+  GooString *ownerPW, *userPW;
+  ImageOutputDev *imgOut;
+  GBool ok;
+  int exitCode;
+
+  exitCode = 99;
+
+  // parse args
+  ok = parseArgs(argDesc, &argc, argv);
+  if (!ok || argc != 3 || printVersion || printHelp) {
+    fprintf(stderr, "pdfimages version %s\n", xpdfVersion);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdfimages", "<PDF-file> <image-root>", argDesc);
+    }
+    goto err0;
+  }
+  fileName = new GooString(argv[1]);
+  imgRoot = argv[2];
+
+  // read config file
+  globalParams = new GlobalParams(cfgFileName);
+  if (quiet) {
+    globalParams->setErrQuiet(quiet);
+  }
+
+  // open PDF file
+  if (ownerPassword[0] != '\001') {
+    ownerPW = new GooString(ownerPassword);
+  } else {
+    ownerPW = NULL;
+  }
+  if (userPassword[0] != '\001') {
+    userPW = new GooString(userPassword);
+  } else {
+    userPW = NULL;
+  }
+  doc = new PDFDoc(fileName, ownerPW, userPW);
+  if (userPW) {
+    delete userPW;
+  }
+  if (ownerPW) {
+    delete ownerPW;
+  }
+  if (!doc->isOk()) {
+    exitCode = 1;
+    goto err1;
+  }
+
+  // check for copy permission
+#ifdef ENFORCE_PERMISSIONS
+  if (!doc->okToCopy()) {
+    error(-1, "Copying of images from this document is not allowed.");
+    exitCode = 3;
+    goto err1;
+  }
+#endif
+
+  // get page range
+  if (firstPage < 1)
+    firstPage = 1;
+  if (lastPage < 1 || lastPage > doc->getNumPages())
+    lastPage = doc->getNumPages();
+
+  // write image files
+  imgOut = new ImageOutputDev(imgRoot, dumpJPEG);
+  if (imgOut->isOk()) {
+      doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0,
+			gTrue, gFalse, gFalse);
+  }
+  delete imgOut;
+
+  exitCode = 0;
+
+  // clean up
+ err1:
+  delete doc;
+  delete globalParams;
+ err0:
+
+  // check for memory leaks
+  Object::memCheck(stderr);
+  gMemReport(stderr);
+
+  return exitCode;
+}
diff --git a/utils/pdfinfo.1 b/utils/pdfinfo.1
new file mode 100644
index 00000000..334520c8
--- /dev/null
+++ b/utils/pdfinfo.1
@@ -0,0 +1,157 @@
+.\" Copyright 1999-2004 Glyph & Cog, LLC
+.TH pdfinfo 1 "22 January 2004"
+.SH NAME
+pdfinfo \- Portable Document Format (PDF) document information
+extractor (version 3.00)
+.SH SYNOPSIS
+.B pdfinfo
+[options]
+.RI [ PDF-file ]
+.SH DESCRIPTION
+.B Pdfinfo
+prints the contents of the \'Info' dictionary (plus some other useful
+information) from a Portable Document Format (PDF) file.
+.PP
+The \'Info' dictionary contains the following values:
+.PP
+.RS
+title
+.RE
+.RS
+subject
+.RE
+.RS
+keywords
+.RE
+.RS
+author
+.RE
+.RS
+creator
+.RE
+.RS
+producer
+.RE
+.RS
+creation date
+.RE
+.RS
+modification date
+.RE
+.PP
+In addition, the following information is printed:
+.PP
+.RS
+tagged (yes/no)
+.RE
+.RS
+page count
+.RE
+.RS
+encrypted flag (yes/no)
+.RE
+.RS
+print and copy permissions (if encrypted)
+.RE
+.RS
+page size
+.RE
+.RS
+file size
+.RE
+.RS
+linearized (yes/no)
+.RE
+.RS
+PDF version
+.RE
+.RS
+metadata (only if requested)
+.RE
+.SH CONFIGURATION FILE
+Pdfinfo reads a configuration file at startup.  It first tries to find
+the user's private config file, ~/.xpdfrc.  If that doesn't exist, it
+looks for a system-wide config file, /etc/xpdf/xpdfrc.  See the
+.BR xpdfrc (5)
+man page for details.
+.SH OPTIONS
+Many of the following options can be set with configuration file
+commands.  These are listed in square brackets with the description of
+the corresponding command line option.
+.TP
+.BI \-f " number"
+Specifies the first page to examine.  If multiple pages are requested
+using the "-f" and "-l" options, the size of each requested page (and,
+optionally, the bounding boxes for each requested page) are printed.
+Otherwise, only page one is examined.
+.TP
+.BI \-l " number"
+Specifies the last page to examine.
+.TP
+.B \-box
+Prints the page box bounding boxes: MediaBox, CropBox, BleedBox,
+TrimBox, and ArtBox.
+.TP
+.B \-meta
+Prints document-level metadata.  (This is the "Metadata" stream from
+the PDF file's Catalog object.)
+.TP
+.BI \-enc " encoding-name"
+Sets the encoding to use for text output.  The
+.I encoding\-name
+must be defined with the unicodeMap command (see
+.BR xpdfrc (5)).
+This defaults to "Latin1" (which is a built-in encoding).
+.RB "[config file: " textEncoding ]
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file.  Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.BI \-cfg " config-file"
+Read
+.I config-file
+in place of ~/.xpdfrc or the system-wide config file.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdfinfo software and documentation are copyright 1996-2004 Glyph &
+Cog, LLC.
+.SH "SEE ALSO"
+.BR xpdf (1),
+.BR pdftops (1),
+.BR pdftotext (1),
+.BR pdffonts (1),
+.BR pdftoppm (1),
+.BR pdfimages (1),
+.BR xpdfrc (5)
+.br
+.B http://www.foolabs.com/xpdf/
diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
new file mode 100644
index 00000000..3d375354
--- /dev/null
+++ b/utils/pdfinfo.cc
@@ -0,0 +1,376 @@
+//========================================================================
+//
+// pdfinfo.cc
+//
+// Copyright 1998-2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Array.h"
+#include "Dict.h"
+#include "XRef.h"
+#include "Catalog.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "CharTypes.h"
+#include "UnicodeMap.h"
+#include "Error.h"
+#include "config.h"
+
+static void printInfoString(Dict *infoDict, char *key, char *text,
+			    UnicodeMap *uMap);
+static void printInfoDate(Dict *infoDict, char *key, char *text);
+static void printBox(char *text, PDFRectangle *box);
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool printBoxes = gFalse;
+static GBool printMetadata = gFalse;
+static char textEncName[128] = "";
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static char cfgFileName[256] = "";
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static ArgDesc argDesc[] = {
+  {"-f",      argInt,      &firstPage,        0,
+   "first page to convert"},
+  {"-l",      argInt,      &lastPage,         0,
+   "last page to convert"},
+  {"-box",    argFlag,     &printBoxes,       0,
+   "print the page bounding boxes"},
+  {"-meta",   argFlag,     &printMetadata,    0,
+   "print the document metadata (XML)"},
+  {"-enc",    argString,   textEncName,    sizeof(textEncName),
+   "output text encoding name"},
+  {"-opw",    argString,   ownerPassword,  sizeof(ownerPassword),
+   "owner password (for encrypted files)"},
+  {"-upw",    argString,   userPassword,   sizeof(userPassword),
+   "user password (for encrypted files)"},
+  {"-cfg",        argString,      cfgFileName,    sizeof(cfgFileName),
+   "configuration file to use in place of .xpdfrc"},
+  {"-v",      argFlag,     &printVersion,  0,
+   "print copyright and version info"},
+  {"-h",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-help",   argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"--help",  argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-?",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {NULL}
+};
+
+int main(int argc, char *argv[]) {
+  PDFDoc *doc;
+  GooString *fileName;
+  GooString *ownerPW, *userPW;
+  UnicodeMap *uMap;
+  Page *page;
+  Object info;
+  char buf[256];
+  double w, h, wISO, hISO;
+  FILE *f;
+  GooString *metadata;
+  GBool ok;
+  int exitCode;
+  int pg, i;
+  GBool multiPage;
+
+  exitCode = 99;
+
+  // parse args
+  ok = parseArgs(argDesc, &argc, argv);
+  if (!ok || argc != 2 || printVersion || printHelp) {
+    fprintf(stderr, "pdfinfo version %s\n", xpdfVersion);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdfinfo", "<PDF-file>", argDesc);
+    }
+    goto err0;
+  }
+  fileName = new GooString(argv[1]);
+
+  // read config file
+  globalParams = new GlobalParams(cfgFileName);
+  if (textEncName[0]) {
+    globalParams->setTextEncoding(textEncName);
+  }
+
+  // get mapping to output encoding
+  if (!(uMap = globalParams->getTextEncoding())) {
+    error(-1, "Couldn't get text encoding");
+    delete fileName;
+    goto err1;
+  }
+
+  // open PDF file
+  if (ownerPassword[0] != '\001') {
+    ownerPW = new GooString(ownerPassword);
+  } else {
+    ownerPW = NULL;
+  }
+  if (userPassword[0] != '\001') {
+    userPW = new GooString(userPassword);
+  } else {
+    userPW = NULL;
+  }
+  doc = new PDFDoc(fileName, ownerPW, userPW);
+  if (userPW) {
+    delete userPW;
+  }
+  if (ownerPW) {
+    delete ownerPW;
+  }
+  if (!doc->isOk()) {
+    exitCode = 1;
+    goto err2;
+  }
+
+  // get page range
+  if (firstPage < 1) {
+    firstPage = 1;
+  }
+  if (lastPage == 0) {
+    multiPage = gFalse;
+    lastPage = 1;
+  } else {
+    multiPage = gTrue;
+  }
+  if (lastPage < 1 || lastPage > doc->getNumPages()) {
+    lastPage = doc->getNumPages();
+  }
+
+  // print doc info
+  doc->getDocInfo(&info);
+  if (info.isDict()) {
+    printInfoString(info.getDict(), "Title",        "Title:          ", uMap);
+    printInfoString(info.getDict(), "Subject",      "Subject:        ", uMap);
+    printInfoString(info.getDict(), "Keywords",     "Keywords:       ", uMap);
+    printInfoString(info.getDict(), "Author",       "Author:         ", uMap);
+    printInfoString(info.getDict(), "Creator",      "Creator:        ", uMap);
+    printInfoString(info.getDict(), "Producer",     "Producer:       ", uMap);
+    printInfoDate(info.getDict(),   "CreationDate", "CreationDate:   ");
+    printInfoDate(info.getDict(),   "ModDate",      "ModDate:        ");
+  }
+  info.free();
+
+  // print tagging info
+  printf("Tagged:         %s\n",
+	 doc->getStructTreeRoot()->isDict() ? "yes" : "no");
+
+  // print page count
+  printf("Pages:          %d\n", doc->getNumPages());
+
+  // print encryption info
+  printf("Encrypted:      ");
+  if (doc->isEncrypted()) {
+    printf("yes (print:%s copy:%s change:%s addNotes:%s)\n",
+	   doc->okToPrint(gTrue) ? "yes" : "no",
+	   doc->okToCopy(gTrue) ? "yes" : "no",
+	   doc->okToChange(gTrue) ? "yes" : "no",
+	   doc->okToAddNotes(gTrue) ? "yes" : "no");
+  } else {
+    printf("no\n");
+  }
+
+  // print page size
+  for (pg = firstPage; pg <= lastPage; ++pg) {
+    w = doc->getPageMediaWidth(pg);
+    h = doc->getPageMediaHeight(pg);
+    if (multiPage) {
+      printf("Page %4d size: %g x %g pts", pg, w, h);
+    } else {
+      printf("Page size:      %g x %g pts", w, h);
+    }
+    if ((fabs(w - 612) < 0.1 && fabs(h - 792) < 0.1) ||
+	(fabs(w - 792) < 0.1 && fabs(h - 612) < 0.1)) {
+      printf(" (letter)");
+    } else {
+      hISO = sqrt(sqrt(2.0)) * 7200 / 2.54;
+      wISO = hISO / sqrt(2.0);
+      for (i = 0; i <= 6; ++i) {
+	if ((fabs(w - wISO) < 1 && fabs(h - hISO) < 1) ||
+	    (fabs(w - hISO) < 1 && fabs(h - wISO) < 1)) {
+	  printf(" (A%d)", i);
+	  break;
+	}
+	hISO = wISO;
+	wISO /= sqrt(2.0);
+      }
+    }
+    printf("\n");
+  } 
+
+  // print the boxes
+  if (printBoxes) {
+    if (multiPage) {
+      for (pg = firstPage; pg <= lastPage; ++pg) {
+	page = doc->getCatalog()->getPage(pg);
+	sprintf(buf, "Page %4d MediaBox: ", pg);
+	printBox(buf, page->getMediaBox());
+	sprintf(buf, "Page %4d CropBox:  ", pg);
+	printBox(buf, page->getCropBox());
+	sprintf(buf, "Page %4d BleedBox: ", pg);
+	printBox(buf, page->getBleedBox());
+	sprintf(buf, "Page %4d TrimBox:  ", pg);
+	printBox(buf, page->getTrimBox());
+	sprintf(buf, "Page %4d ArtBox:   ", pg);
+	printBox(buf, page->getArtBox());
+      }
+    } else {
+      page = doc->getCatalog()->getPage(firstPage);
+      printBox("MediaBox:       ", page->getMediaBox());
+      printBox("CropBox:        ", page->getCropBox());
+      printBox("BleedBox:       ", page->getBleedBox());
+      printBox("TrimBox:        ", page->getTrimBox());
+      printBox("ArtBox:         ", page->getArtBox());
+    }
+  }
+
+  // print file size
+#ifdef VMS
+  f = fopen(fileName->getCString(), "rb", "ctx=stm");
+#else
+  f = fopen(fileName->getCString(), "rb");
+#endif
+  if (f) {
+#if HAVE_FSEEKO
+    fseeko(f, 0, SEEK_END);
+    printf("File size:      %u bytes\n", (Guint)ftello(f));
+#elif HAVE_FSEEK64
+    fseek64(f, 0, SEEK_END);
+    printf("File size:      %u bytes\n", (Guint)ftell64(f));
+#else
+    fseek(f, 0, SEEK_END);
+    printf("File size:      %d bytes\n", (int)ftell(f));
+#endif
+    fclose(f);
+  }
+
+  // print linearization info
+  printf("Optimized:      %s\n", doc->isLinearized() ? "yes" : "no");
+
+  // print PDF version
+  printf("PDF version:    %.1f\n", doc->getPDFVersion());
+
+  // print the metadata
+  if (printMetadata && (metadata = doc->readMetadata())) {
+    fputs("Metadata:\n", stdout);
+    fputs(metadata->getCString(), stdout);
+    fputc('\n', stdout);
+    delete metadata;
+  }
+
+  exitCode = 0;
+
+  // clean up
+ err2:
+  uMap->decRefCnt();
+  delete doc;
+ err1:
+  delete globalParams;
+ err0:
+
+  // check for memory leaks
+  Object::memCheck(stderr);
+  gMemReport(stderr);
+
+  return exitCode;
+}
+
+static void printInfoString(Dict *infoDict, char *key, char *text,
+			    UnicodeMap *uMap) {
+  Object obj;
+  GooString *s1;
+  GBool isUnicode;
+  Unicode u;
+  char buf[8];
+  int i, n;
+
+  if (infoDict->lookup(key, &obj)->isString()) {
+    fputs(text, stdout);
+    s1 = obj.getString();
+    if ((s1->getChar(0) & 0xff) == 0xfe &&
+	(s1->getChar(1) & 0xff) == 0xff) {
+      isUnicode = gTrue;
+      i = 2;
+    } else {
+      isUnicode = gFalse;
+      i = 0;
+    }
+    while (i < obj.getString()->getLength()) {
+      if (isUnicode) {
+	u = ((s1->getChar(i) & 0xff) << 8) |
+	    (s1->getChar(i+1) & 0xff);
+	i += 2;
+      } else {
+	u = s1->getChar(i) & 0xff;
+	++i;
+      }
+      n = uMap->mapUnicode(u, buf, sizeof(buf));
+      fwrite(buf, 1, n, stdout);
+    }
+    fputc('\n', stdout);
+  }
+  obj.free();
+}
+
+static void printInfoDate(Dict *infoDict, char *key, char *text) {
+  Object obj;
+  char *s;
+  int year, mon, day, hour, min, sec;
+  struct tm tmStruct;
+  char buf[256];
+
+  if (infoDict->lookup(key, &obj)->isString()) {
+    fputs(text, stdout);
+    s = obj.getString()->getCString();
+    if (s[0] == 'D' && s[1] == ':') {
+      s += 2;
+    }
+    if (sscanf(s, "%4d%2d%2d%2d%2d%2d",
+	       &year, &mon, &day, &hour, &min, &sec) == 6) {
+      tmStruct.tm_year = year - 1900;
+      tmStruct.tm_mon = mon - 1;
+      tmStruct.tm_mday = day;
+      tmStruct.tm_hour = hour;
+      tmStruct.tm_min = min;
+      tmStruct.tm_sec = sec;
+      tmStruct.tm_wday = -1;
+      tmStruct.tm_yday = -1;
+      tmStruct.tm_isdst = -1;
+      // compute the tm_wday and tm_yday fields
+      if (mktime(&tmStruct) != (time_t)-1 &&
+	  strftime(buf, sizeof(buf), "%c", &tmStruct)) {
+	fputs(buf, stdout);
+      } else {
+	fputs(s, stdout);
+      }
+    } else {
+      fputs(s, stdout);
+    }
+    fputc('\n', stdout);
+  }
+  obj.free();
+}
+
+static void printBox(char *text, PDFRectangle *box) {
+  printf("%s%8.2f %8.2f %8.2f %8.2f\n",
+	 text, box->x1, box->y1, box->x2, box->y2);
+}
diff --git a/utils/pdftohtml.1 b/utils/pdftohtml.1
new file mode 100644
index 00000000..850aa840
--- /dev/null
+++ b/utils/pdftohtml.1
@@ -0,0 +1,85 @@
+.TH PDFTOHTML 1
+.\" NAME should be all caps, SECTION should be 1-8, maybe w/ subsection
+.\" other parms are allowed: see man(7), man(1)
+.SH NAME
+pdftohtml \- program to convert pdf files into html, xml and png images
+.SH SYNOPSIS
+.B pdftohtml
+.I "[options] <PDF-file> [<html-file> <xml-file>]"
+.SH "DESCRIPTION"
+This manual page documents briefly the
+.BR pdftohtml 
+command.
+This manual page was written for the Debian GNU/Linux distribution
+because the original program does not have a manual page.
+.PP
+.B pdftohtml
+is a program that converts pdf documents into html. It generates its output in 
+the current working directory.
+.SH OPTIONS
+A summary of options are included below.
+.TP
+.B \-h, \-help
+Show summary of options.
+.TP
+.B \-f <int>
+first page to print
+.TP
+.B \-l <int>
+last page to print
+.TP
+.B \-q
+dont print any messages or errors
+.TP
+.B \-v
+print copyright and version info
+.TP
+.B \-p
+exchange .pdf links with .html
+.TP
+.B \-c
+generate complex output
+.TP
+.B \-i
+ignore images
+.TP
+.B \-noframes
+generate no frames. Not supported in complex output mode.
+.TP
+.B \-stdout
+use standard output
+.TP 
+.B \-zoom <fp>
+zoom the pdf document (default 1.5)
+.TP
+.B \-xml
+output for XML post-processing
+.TP
+.B \-enc <string>
+output text encoding name
+.TP
+.B \-opw <string>
+owner password (for encrypted files)
+.TP
+.B \-upw <string>
+user password (for encrypted files)
+.TP
+.B \-hidden
+force hidden text extraction
+.TP
+.B \-dev 
+output device name for Ghostscript (png16m, jpeg etc)
+.TP
+.B \-nomerge
+do not merge paragraphs
+.TP
+.B \-nodrm
+override document DRM settings
+
+.SH AUTHOR
+
+Pdftohtml was developed by Gueorgui Ovtcharov and Rainer Dorsch. It is
+based and benefits a lot from Derek Noonburg's xpdf package.
+
+This manual page was written by S�ren Boll Overgaard <boll@debian.org>,
+for the Debian GNU/Linux system (but may be used by others).
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
new file mode 100644
index 00000000..99fbc0a3
--- /dev/null
+++ b/utils/pdftohtml.cc
@@ -0,0 +1,429 @@
+//========================================================================
+//
+// pdftohtml.cc
+//
+//
+// Copyright 1999-2000 G. Ovtcharov
+//========================================================================
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <dirent.h>
+#include <poppler-config.h>
+#include <time.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Array.h"
+#include "Dict.h"
+#include "XRef.h"
+#include "Catalog.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "HtmlOutputDev.h"
+#include "PSOutputDev.h"
+#include "GlobalParams.h"
+#include "Error.h"
+#include "config.h"
+#include "goo/gfile.h"
+
+#ifndef GHOSTSCRIPT
+# define GHOSTSCRIPT "gs"
+#endif
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool rawOrder = gTrue;
+GBool printCommands = gTrue;
+static GBool printHelp = gFalse;
+GBool printHtml = gFalse;
+GBool complexMode=gFalse;
+GBool ignore=gFalse;
+//char extension[5]=".png";
+double scale=1.5;
+GBool noframes=gFalse;
+GBool stout=gFalse;
+GBool xml=gFalse;
+GBool errQuiet=gFalse;
+GBool noDrm=gFalse;
+
+GBool showHidden = gFalse;
+GBool noMerge = gFalse;
+static char ownerPassword[33] = "";
+static char userPassword[33] = "";
+static char gsDevice[33] = "png16m";
+static GBool printVersion = gFalse;
+
+static GooString* getInfoString(Dict *infoDict, char *key);
+static GooString* getInfoDate(Dict *infoDict, char *key);
+
+static char textEncName[128] = "";
+
+static ArgDesc argDesc[] = {
+  {"-f",      argInt,      &firstPage,     0,
+   "first page to convert"},
+  {"-l",      argInt,      &lastPage,      0,
+   "last page to convert"},
+  /*{"-raw",    argFlag,     &rawOrder,      0,
+    "keep strings in content stream order"},*/
+  {"-q",      argFlag,     &errQuiet,      0,
+   "don't print any messages or errors"},
+  {"-h",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-help",   argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-p",      argFlag,     &printHtml,     0,
+   "exchange .pdf links by .html"}, 
+  {"-c",      argFlag,     &complexMode,          0,
+   "generate complex document"},
+  {"-i",      argFlag,     &ignore,        0,
+   "ignore images"},
+  {"-noframes", argFlag,   &noframes,      0,
+   "generate no frames"},
+  {"-stdout"  ,argFlag,    &stout,         0,
+   "use standard output"},
+  {"-zoom",   argFP,    &scale,         0,
+   "zoom the pdf document (default 1.5)"},
+  {"-xml",    argFlag,    &xml,         0,
+   "output for XML post-processing"},
+  {"-hidden", argFlag,   &showHidden,   0,
+   "output hidden text"},
+  {"-nomerge", argFlag, &noMerge, 0,
+   "do not merge paragraphs"},   
+  {"-enc",    argString,   textEncName,    sizeof(textEncName),
+   "output text encoding name"},
+  {"-dev",    argString,   gsDevice,       sizeof(gsDevice),
+   "output device name for Ghostscript (png16m, jpeg etc)"},
+  {"-v",      argFlag,     &printVersion,  0,
+   "print copyright and version info"},
+  {"-opw",    argString,   ownerPassword,  sizeof(ownerPassword),
+   "owner password (for encrypted files)"},
+  {"-upw",    argString,   userPassword,   sizeof(userPassword),
+   "user password (for encrypted files)"},
+  {"-nodrm", argFlag, &noDrm, 0,
+   "override document DRM settings"},
+  {NULL}
+};
+
+int main(int argc, char *argv[]) {
+  PDFDoc *doc = NULL;
+  GooString *fileName = NULL;
+  GooString *docTitle = NULL;
+  GooString *author = NULL, *keywords = NULL, *subject = NULL, *date = NULL;
+  GooString *htmlFileName = NULL;
+  GooString *psFileName = NULL;
+  HtmlOutputDev *htmlOut = NULL;
+  PSOutputDev *psOut = NULL;
+  GBool ok;
+  char *p;
+  char extension[16] = "png";
+  GooString *ownerPW, *userPW;
+  Object info;
+  char * extsList[] = {"png", "jpeg", "bmp", "pcx", "tiff", "pbm", NULL};
+
+  // parse args
+  ok = parseArgs(argDesc, &argc, argv);
+  if (!ok || argc < 2 || argc > 3 || printHelp || printVersion) {
+    fprintf(stderr, "pdftohtml version %s http://pdftohtml.sourceforge.net/, based on Xpdf version %s\n", "0.36", xpdfVersion);
+    fprintf(stderr, "%s\n", "Copyright 1999-2003 Gueorgui Ovtcharov and Rainer Dorsch");
+    fprintf(stderr, "%s\n\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdftohtml", "<PDF-file> [<html-file> <xml-file>]", argDesc);
+    }
+    exit(1);
+  }
+ 
+  // init error file
+  //errorInit();
+
+  // read config file
+  globalParams = new GlobalParams("");
+
+  if (errQuiet) {
+    globalParams->setErrQuiet(errQuiet);
+    printCommands = gFalse; // I'm not 100% what is the differecne between them
+  }
+
+  if (textEncName[0]) {
+    globalParams->setTextEncoding(textEncName);
+    if( !globalParams->getTextEncoding() )  {
+	goto error;    
+    }
+  }
+
+  // open PDF file
+  if (ownerPassword[0]) {
+    ownerPW = new GooString(ownerPassword);
+  } else {
+    ownerPW = NULL;
+  }
+  if (userPassword[0]) {
+    userPW = new GooString(userPassword);
+  } else {
+    userPW = NULL;
+  }
+
+  fileName = new GooString(argv[1]);
+
+  doc = new PDFDoc(fileName, ownerPW, userPW);
+  if (userPW) {
+    delete userPW;
+  }
+  if (ownerPW) {
+    delete ownerPW;
+  }
+  if (!doc->isOk()) {
+    goto error;
+  }
+
+  // check for copy permission
+  if (!doc->okToCopy()) {
+    if (!noDrm) {
+      error(-1, "Copying of text from this document is not allowed.");
+      goto error;
+    }
+    fprintf(stderr, "Document has copy-protection bit set.\n");
+  }
+
+  // construct text file name
+  if (argc == 3) {
+    GooString* tmp = new GooString(argv[2]);
+    p=tmp->getCString()+tmp->getLength()-5;
+    if (!xml)
+      if (!strcmp(p, ".html") || !strcmp(p, ".HTML"))
+	htmlFileName = new GooString(tmp->getCString(),
+				   tmp->getLength() - 5);
+      else htmlFileName =new GooString(tmp);
+    else   
+      if (!strcmp(p, ".xml") || !strcmp(p, ".XML"))
+	htmlFileName = new GooString(tmp->getCString(),
+				   tmp->getLength() - 5);
+      else htmlFileName =new GooString(tmp);
+    
+    delete tmp;
+  } else {
+    p = fileName->getCString() + fileName->getLength() - 4;
+    if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF"))
+      htmlFileName = new GooString(fileName->getCString(),
+				 fileName->getLength() - 4);
+    else
+      htmlFileName = fileName->copy();
+    //   htmlFileName->append(".html");
+  }
+  
+   if (scale>3.0) scale=3.0;
+   if (scale<0.5) scale=0.5;
+   
+   if (complexMode) {
+     //noframes=gFalse;
+     stout=gFalse;
+   } 
+
+   if (stout) {
+     noframes=gTrue;
+     complexMode=gFalse;
+   }
+
+   if (xml)
+   { 
+       complexMode = gTrue;
+       noframes = gTrue;
+       noMerge = gTrue;
+   }
+
+  // get page range
+  if (firstPage < 1)
+    firstPage = 1;
+  if (lastPage < 1 || lastPage > doc->getNumPages())
+    lastPage = doc->getNumPages();
+
+  doc->getDocInfo(&info);
+  if (info.isDict()) {
+    docTitle = getInfoString(info.getDict(), "Title");
+    author = getInfoString(info.getDict(), "Author");
+    keywords = getInfoString(info.getDict(), "Keywords");
+    subject = getInfoString(info.getDict(), "Subject");
+    date = getInfoDate(info.getDict(), "ModDate");
+    if( !date )
+	date = getInfoDate(info.getDict(), "CreationDate");
+  }
+  info.free();
+  if( !docTitle ) docTitle = new GooString(htmlFileName);
+
+  /* determine extensions of output backgroun images */
+  {int i;
+  for(i = 0; extsList[i]; i++)
+  {
+	  if( strstr(gsDevice, extsList[i]) != (char *) NULL )
+	  {
+		  strncpy(extension, extsList[i], sizeof(extension));
+		  break;
+	  }
+  }}
+
+  rawOrder = complexMode; // todo: figure out what exactly rawOrder do :)
+
+  // write text file
+  htmlOut = new HtmlOutputDev(htmlFileName->getCString(), 
+	  docTitle->getCString(), 
+	  author ? author->getCString() : NULL,
+	  keywords ? keywords->getCString() : NULL, 
+          subject ? subject->getCString() : NULL, 
+	  date ? date->getCString() : NULL,
+	  extension,
+	  rawOrder, 
+	  firstPage,
+	  doc->getCatalog()->getOutline()->isDict());
+  delete docTitle;
+  if( author )
+  {   
+      delete author;
+  }
+  if( keywords )
+  {
+      delete keywords;
+  }
+  if( subject )
+  {
+      delete subject;
+  }
+  if( date )
+  {
+      delete date;
+  }
+
+  if (htmlOut->isOk())
+  {
+    doc->displayPages(htmlOut, firstPage, lastPage, 72, 72, 0,
+		      gTrue, gFalse, gFalse);
+  	if (!xml)
+	{
+		htmlOut->dumpDocOutline(doc->getCatalog());
+	}
+  }
+  
+  if( complexMode && !xml && !ignore ) {
+    int h=xoutRound(htmlOut->getPageHeight()/scale);
+    int w=xoutRound(htmlOut->getPageWidth()/scale);
+    //int h=xoutRound(doc->getPageHeight(1)/scale);
+    //int w=xoutRound(doc->getPageWidth(1)/scale);
+
+    psFileName = new GooString(htmlFileName->getCString());
+    psFileName->append(".ps");
+
+    globalParams->setPSPaperWidth(w);
+    globalParams->setPSPaperHeight(h);
+    // XXX
+    // globalParams->setPSNoText(gTrue);
+    psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
+			    doc->getCatalog(), firstPage, lastPage, psModePS);
+    doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0,
+		      gTrue, gFalse, gFalse);
+    delete psOut;
+
+    /*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r72 -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, htmlFileName->getCString(), w, h,
+      psFileName->getCString());*/
+    
+    GooString *gsCmd = new GooString(GHOSTSCRIPT);
+    GooString *tw, *th, *sc;
+    gsCmd->append(" -sDEVICE=");
+	gsCmd->append(gsDevice);
+	gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r");
+    sc = GooString::fromInt(static_cast<int>(72*scale));
+    gsCmd->append(sc);
+    gsCmd->append(" -sOutputFile=");
+    gsCmd->append("\"");
+    gsCmd->append(htmlFileName);
+    gsCmd->append("%03d.");
+	gsCmd->append(extension);
+	gsCmd->append("\" -g");
+    tw = GooString::fromInt(static_cast<int>(scale*w));
+    gsCmd->append(tw);
+    gsCmd->append("x");
+    th = GooString::fromInt(static_cast<int>(scale*h));
+    gsCmd->append(th);
+    gsCmd->append(" -q \"");
+    gsCmd->append(psFileName);
+    gsCmd->append("\"");
+//    printf("running: %s\n", gsCmd->getCString());
+    if( !executeCommand(gsCmd->getCString()) && !errQuiet) {
+      error(-1, "Failed to launch Ghostscript!\n");
+    }
+    unlink(psFileName->getCString());
+    delete tw;
+    delete th;
+    delete sc;
+    delete gsCmd;
+    delete psFileName;
+  }
+  
+  delete htmlOut;
+
+  // clean up
+ error:
+  if(doc) delete doc;
+  if(globalParams) delete globalParams;
+
+  if(htmlFileName) delete htmlFileName;
+  HtmlFont::clear();
+  
+  // check for memory leaks
+  Object::memCheck(stderr);
+  gMemReport(stderr);
+
+  return 0;
+}
+
+static GooString* getInfoString(Dict *infoDict, char *key) {
+  Object obj;
+  GooString *s1 = NULL;
+
+  if (infoDict->lookup(key, &obj)->isString()) {
+    s1 = new GooString(obj.getString());
+  }
+  obj.free();
+  return s1;
+}
+
+static GooString* getInfoDate(Dict *infoDict, char *key) {
+  Object obj;
+  char *s;
+  int year, mon, day, hour, min, sec;
+  struct tm tmStruct;
+  GooString *result = NULL;
+  char buf[256];
+
+  if (infoDict->lookup(key, &obj)->isString()) {
+    s = obj.getString()->getCString();
+    if (s[0] == 'D' && s[1] == ':') {
+      s += 2;
+    }
+    if (sscanf(s, "%4d%2d%2d%2d%2d%2d",
+               &year, &mon, &day, &hour, &min, &sec) == 6) {
+      tmStruct.tm_year = year - 1900;
+      tmStruct.tm_mon = mon - 1;
+      tmStruct.tm_mday = day;
+      tmStruct.tm_hour = hour;
+      tmStruct.tm_min = min;
+      tmStruct.tm_sec = sec;
+      tmStruct.tm_wday = -1;
+      tmStruct.tm_yday = -1;
+      tmStruct.tm_isdst = -1;
+      mktime(&tmStruct); // compute the tm_wday and tm_yday fields
+      if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S+00:00", &tmStruct)) {
+	result = new GooString(buf);
+      } else {
+        result = new GooString(s);
+      }
+    } else {
+      result = new GooString(s);
+    }
+  }
+  obj.free();
+  return result;
+}
+
diff --git a/utils/pdftoppm.1 b/utils/pdftoppm.1
new file mode 100644
index 00000000..f4d93e3a
--- /dev/null
+++ b/utils/pdftoppm.1
@@ -0,0 +1,113 @@
+.\" Copyright 2004 Glyph & Cog, LLC
+.TH pdftoppm 1 "22 January 2004"
+.SH NAME
+pdftoppm \- Portable Document Format (PDF) to Portable Pixmap (PPM)
+converter (version 3.00)
+.SH SYNOPSIS
+.B pdftoppm
+[options]
+.I PDF-file PPM-root
+.SH DESCRIPTION
+.B Pdftoppm
+converts Portable Document Format (PDF) files to color image files in
+Portable Pixmap (PPM) format, grayscale image files in Portable
+Graymap (PGM) format, or monochrome image files in Portable Bitmap
+(PBM) format.
+.PP
+Pdftoppm reads the PDF file,
+.IR PDF-file ,
+and writes one PPM file for each page,
+.IR PPM-root - nnnnnn .ppm,
+where
+.I nnnnnn
+is the page number.
+.SH CONFIGURATION FILE
+Pdftoppm reads a configuration file at startup.  It first tries to
+find the user's private config file, ~/.xpdfrc.  If that doesn't
+exist, it looks for a system-wide config file, /etc/xpdf/xpdfrc. See the
+.BR xpdfrc (5)
+man page for details.
+.SH OPTIONS
+Many of the following options can be set with configuration file
+commands.  These are listed in square brackets with the description of
+the corresponding command line option.
+.TP
+.BI \-f " number"
+Specifies the first page to convert.
+.TP
+.BI \-l " number"
+Specifies the last page to convert.
+.TP
+.BI \-r " number"
+Specifies the resolution, in DPI.  The default is 150 DPI.
+.TP
+.B \-mono
+Generate a monochrome PBM file (instead of a color PPM file).
+.TP
+.B \-gray
+Generate a grayscale PGM file (instead of a color PPM file).
+.TP
+.BI \-t1lib " yes | no"
+Enable or disable t1lib (a Type 1 font rasterizer).  This defaults to
+"yes".
+.RB "[config file: " enableT1lib ]
+.TP
+.BI \-freetype " yes | no"
+Enable or disable FreeType (a TrueType / Type 1 font rasterizer).
+This defaults to "yes".
+.RB "[config file: " enableFreeType ]
+.TP
+.BI \-aa " yes | no"
+Enable or disable font anti-aliasing.  This defaults to "yes".
+.RB "[config file: " antialias ]
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file.  Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.B \-q
+Don't print any messages or errors.
+.RB "[config file: " errQuiet ]
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdftoppm software and documentation are copyright 1996-2004 Glyph
+& Cog, LLC.
+.SH "SEE ALSO"
+.BR xpdf (1),
+.BR pdftops (1),
+.BR pdftotext (1),
+.BR pdfinfo (1),
+.BR pdffonts (1),
+.BR pdfimages (1),
+.BR xpdfrc (5)
+.br
+.B http://www.foolabs.com/xpdf/
diff --git a/utils/pdftoppm.cc b/utils/pdftoppm.cc
new file mode 100644
index 00000000..ba153b72
--- /dev/null
+++ b/utils/pdftoppm.cc
@@ -0,0 +1,189 @@
+//========================================================================
+//
+// pdftoppm.cc
+//
+// Copyright 2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+#include <poppler-config.h>
+#include <stdio.h>
+#include "parseargs.h"
+#include "goo/gmem.h"
+#include "goo/GooString.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "PDFDoc.h"
+#include "splash/SplashBitmap.h"
+#include "splash/Splash.h"
+#include "SplashOutputDev.h"
+#include "config.h"
+
+static int firstPage = 1;
+static int lastPage = 0;
+static int resolution = 150;
+static GBool mono = gFalse;
+static GBool gray = gFalse;
+static char enableT1libStr[16] = "";
+static char enableFreeTypeStr[16] = "";
+static char antialiasStr[16] = "";
+static char ownerPassword[33] = "";
+static char userPassword[33] = "";
+static GBool quiet = gFalse;
+static char cfgFileName[256] = "";
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static ArgDesc argDesc[] = {
+  {"-f",      argInt,      &firstPage,     0,
+   "first page to print"},
+  {"-l",      argInt,      &lastPage,      0,
+   "last page to print"},
+  {"-r",      argInt,      &resolution,    0,
+   "resolution, in DPI (default is 150)"},
+  {"-mono",   argFlag,     &mono,          0,
+   "generate a monochrome PBM file"},
+  {"-gray",   argFlag,     &gray,          0,
+   "generate a grayscale PGM file"},
+#if HAVE_T1LIB_H
+  {"-t1lib",      argString,      enableT1libStr, sizeof(enableT1libStr),
+   "enable t1lib font rasterizer: yes, no"},
+#endif
+#if HAVE_FREETYPE_FREETYPE_H | HAVE_FREETYPE_H
+  {"-freetype",   argString,      enableFreeTypeStr, sizeof(enableFreeTypeStr),
+   "enable FreeType font rasterizer: yes, no"},
+#endif
+  {"-aa",         argString,      antialiasStr,   sizeof(antialiasStr),
+   "enable font anti-aliasing: yes, no"},
+  {"-opw",    argString,   ownerPassword,  sizeof(ownerPassword),
+   "owner password (for encrypted files)"},
+  {"-upw",    argString,   userPassword,   sizeof(userPassword),
+   "user password (for encrypted files)"},
+  {"-q",      argFlag,     &quiet,         0,
+   "don't print any messages or errors"},
+  {"-cfg",        argString,      cfgFileName,    sizeof(cfgFileName),
+   "configuration file to use in place of .xpdfrc"},
+  {"-v",      argFlag,     &printVersion,  0,
+   "print copyright and version info"},
+  {"-h",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-help",   argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"--help",  argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-?",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {NULL}
+};
+
+int main(int argc, char *argv[]) {
+  PDFDoc *doc;
+  GooString *fileName;
+  char *ppmRoot;
+  char ppmFile[512];
+  GooString *ownerPW, *userPW;
+  SplashColor paperColor;
+  SplashOutputDev *splashOut;
+  GBool ok;
+  int exitCode;
+  int pg;
+
+  exitCode = 99;
+
+  // parse args
+  ok = parseArgs(argDesc, &argc, argv);
+  if (mono && gray) {
+    ok = gFalse;
+  }
+  if (!ok || argc != 3 || printVersion || printHelp) {
+    fprintf(stderr, "pdftoppm version %s\n", xpdfVersion);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdftoppm", "<PDF-file> <PPM-root>", argDesc);
+    }
+    goto err0;
+  }
+  fileName = new GooString(argv[1]);
+  ppmRoot = argv[2];
+
+  // read config file
+  globalParams = new GlobalParams(cfgFileName);
+  globalParams->setupBaseFonts(NULL);
+  if (enableT1libStr[0]) {
+    if (!globalParams->setEnableT1lib(enableT1libStr)) {
+      fprintf(stderr, "Bad '-t1lib' value on command line\n");
+    }
+  }
+  if (enableFreeTypeStr[0]) {
+    if (!globalParams->setEnableFreeType(enableFreeTypeStr)) {
+      fprintf(stderr, "Bad '-freetype' value on command line\n");
+    }
+  }
+  if (antialiasStr[0]) {
+    if (!globalParams->setAntialias(antialiasStr)) {
+      fprintf(stderr, "Bad '-aa' value on command line\n");
+    }
+  }
+  if (quiet) {
+    globalParams->setErrQuiet(quiet);
+  }
+
+  // open PDF file
+  if (ownerPassword[0]) {
+    ownerPW = new GooString(ownerPassword);
+  } else {
+    ownerPW = NULL;
+  }
+  if (userPassword[0]) {
+    userPW = new GooString(userPassword);
+  } else {
+    userPW = NULL;
+  }
+  doc = new PDFDoc(fileName, ownerPW, userPW);
+  if (userPW) {
+    delete userPW;
+  }
+  if (ownerPW) {
+    delete ownerPW;
+  }
+  if (!doc->isOk()) {
+    exitCode = 1;
+    goto err1;
+  }
+
+  // get page range
+  if (firstPage < 1)
+    firstPage = 1;
+  if (lastPage < 1 || lastPage > doc->getNumPages())
+    lastPage = doc->getNumPages();
+
+  // write PPM files
+  paperColor.rgb8 = splashMakeRGB8(255, 255, 255);
+  splashOut = new SplashOutputDev(mono ? splashModeMono1 :
+				    gray ? splashModeMono8 :
+				             splashModeRGB8,
+				  gFalse, paperColor);
+  splashOut->startDoc(doc->getXRef());
+  for (pg = firstPage; pg <= lastPage; ++pg) {
+    doc->displayPage(splashOut, pg, resolution, resolution, 0, gTrue, gFalse);
+    sprintf(ppmFile, "%.*s-%06d.%s",
+	    (int)sizeof(ppmFile) - 32, ppmRoot, pg,
+	    mono ? "pbm" : gray ? "pgm" : "ppm");
+    splashOut->getBitmap()->writePNMFile(ppmFile);
+  }
+  delete splashOut;
+
+  exitCode = 0;
+
+  // clean up
+ err1:
+  delete doc;
+  delete globalParams;
+ err0:
+
+  // check for memory leaks
+  Object::memCheck(stderr);
+  gMemReport(stderr);
+
+  return exitCode;
+}
diff --git a/utils/pdftops.1 b/utils/pdftops.1
new file mode 100644
index 00000000..04c5c7e1
--- /dev/null
+++ b/utils/pdftops.1
@@ -0,0 +1,224 @@
+.\" Copyright 1996-2004 Glyph & Cog, LLC
+.TH pdftops 1 "22 January 2004"
+.SH NAME
+pdftops \- Portable Document Format (PDF) to PostScript converter
+(version 3.00)
+.SH SYNOPSIS
+.B pdftops
+[options]
+.RI [ PDF-file
+.RI [ PS-file ]]
+.SH DESCRIPTION
+.B Pdftops
+converts Portable Document Format (PDF) files to PostScript so they
+can be printed.
+.PP
+Pdftops reads the PDF file,
+.IR PDF-file ,
+and writes a PostScript file,
+.IR PS-file .
+If
+.I PS-file
+is not specified, pdftops converts
+.I file.pdf
+to
+.I file.ps
+(or
+.I file.eps
+with the -eps option).  If 
+.I PS-file
+is \'-', the PostScript is sent to stdout.
+.SH CONFIGURATION FILE
+Pdftops reads a configuration file at startup.  It first tries to find
+the user's private config file, ~/.xpdfrc.  If that doesn't exist, it
+looks for a system-wide config file, /etc/xpdf/xpdfrc.  See the
+.BR xpdfrc (5)
+man page for details.
+.SH OPTIONS
+Many of the following options can be set with configuration file
+commands.  These are listed in square brackets with the description of
+the corresponding command line option.
+.TP
+.BI \-f " number"
+Specifies the first page to print.
+.TP
+.BI \-l " number"
+Specifies the last page to print.
+.TP
+.B \-level1
+Generate Level 1 PostScript.  The resulting PostScript files will be
+significantly larger (if they contain images), but will print on Level
+1 printers.  This also converts all images to black and white.  No
+more than one of the PostScript level options (-level1, -level1sep,
+-level2, -level2sep, -level3, -level3Sep) may be given.
+.RB "[config file: " psLevel ]
+.TP
+.B \-level1sep
+Generate Level 1 separable PostScript.  All colors are converted to
+CMYK.  Images are written with separate stream data for the four
+components.
+.RB "[config file: " psLevel ]
+.TP
+.B \-level2
+Generate Level 2 PostScript.  Level 2 supports color images and image
+compression.  This is the default setting.
+.RB "[config file: " psLevel ]
+.TP
+.B \-level2sep
+Generate Level 2 separable PostScript.  All colors are converted to
+CMYK.  The PostScript separation convention operators are used to
+handle custom (spot) colors.
+.RB "[config file: " psLevel ]
+.TP
+.B \-level3
+Generate Level 3 PostScript.  This enables all Level 2 features plus
+CID font embedding.
+.RB "[config file: " psLevel ]
+.TP
+.B \-level3Sep
+Generate Level 3 separable PostScript.  The separation handling is the
+same as for -level2Sep.
+.RB "[config file: " psLevel ]
+.TP
+.B \-eps
+Generate an Encapsulated PostScript (EPS) file.  An EPS file contains
+a single image, so if you use this option with a multi-page PDF file,
+you must use -f and -l to specify a single page.  No more than one of
+the mode options (-eps, -form) may be given.
+.TP
+.B \-form
+Generate a PostScript form which can be imported by software that
+understands forms.  A form contains a single page, so if you use this
+option with a multi-page PDF file, you must use -f and -l to specify a
+single page.  The -level1 option cannot be used with -form.
+.TP
+.B \-opi
+Generate OPI comments for all images and forms which have OPI
+information.  (This option is only available if pdftops was compiled
+with OPI support.)
+.RB "[config file: " psOPI ]
+.TP
+.B \-noembt1
+By default, any Type 1 fonts which are embedded in the PDF file are
+copied into the PostScript file.  This option causes pdftops to
+substitute base fonts instead.  Embedded fonts make PostScript files
+larger, but may be necessary for readable output.
+.RB "[config file: " psEmbedType1Fonts ]
+.TP
+.B \-noembtt
+By default, any TrueType fonts which are embedded in the PDF file are
+copied into the PostScript file.  This option causes pdftops to
+substitute base fonts instead.  Embedded fonts make PostScript files
+larger, but may be necessary for readable output.  Also, some
+PostScript interpreters do not have TrueType rasterizers.
+.RB "[config file: " psEmbedTrueTypeFonts ]
+.TP
+.B \-noembcidps
+By default, any CID PostScript fonts which are embedded in the PDF
+file are copied into the PostScript file.  This option disables that
+embedding.  No attempt is made to substitute for non-embedded CID
+PostScript fonts.
+.RB "[config file: " psEmbedCIDPostScriptFonts ]
+.TP
+.B \-noembcidtt
+By default, any CID TrueType fonts which are embedded in the PDF file
+are copied into the PostScript file.  This option disables that
+embedding.  No attempt is made to substitute for non-embedded CID
+TrueType fonts.
+.RB "[config file: " psEmbedCIDTrueTypeFonts ]
+.TP
+.BI \-paper " size"
+Set the paper size to one of "letter", "legal", "A4", or "A3".  This
+can also be set to "match", which will set the paper size to match the
+size specified in the PDF file.
+.RB "[config file: " psPaperSize ]
+.TP
+.BI \-paperw " size"
+Set the paper width, in points.
+.RB "[config file: " psPaperSize ]
+.TP
+.BI \-paperh " size"
+Set the paper height, in points.
+.RB "[config file: " psPaperSize ]
+.TP
+.B \-nocrop
+By default, output is cropped to the CropBox specified in the PDF
+file.  This option disables cropping.
+.RB "[config file: " psCrop ]
+.TP
+.B \-expand
+Expand PDF pages smaller than the paper to fill the paper.  By
+default, these pages are not scaled.
+.RB "[config file: " psExpandSmaller ]
+.TP
+.B \-noshrink
+Don't scale PDF pages which are larger than the paper.  By default,
+pages larger than the paper are shrunk to fit.
+.RB "[config file: " psShrinkLarger ]
+.TP
+.B \-nocenter
+By default, PDF pages smaller than the paper (after any scaling) are
+centered on the paper.  This option causes them to be aligned to the
+lower-left corner of the paper instead.
+.RB "[config file: " psCenter ]
+.TP
+.B \-duplex
+Set the Duplex pagedevice entry in the PostScript file.  This tells
+duplex-capable printers to enable duplexing.
+.RB "[config file: " psDuplex ]
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file.  Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.B \-q
+Don't print any messages or errors.
+.RB "[config file: " errQuiet ]
+.TP
+.BI \-cfg " config-file"
+Read
+.I config-file
+in place of ~/.xpdfrc or the system-wide config file.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdftops software and documentation are copyright 1996-2004 Glyph &
+Cog, LLC.
+.SH "SEE ALSO"
+.BR xpdf (1),
+.BR pdftotext (1),
+.BR pdfinfo (1),
+.BR pdffonts (1),
+.BR pdftoppm (1),
+.BR pdfimages (1),
+.BR xpdfrc (5)
+.br
+.B http://www.foolabs.com/xpdf/
diff --git a/utils/pdftops.cc b/utils/pdftops.cc
new file mode 100644
index 00000000..308a6e0b
--- /dev/null
+++ b/utils/pdftops.cc
@@ -0,0 +1,336 @@
+//========================================================================
+//
+// pdftops.cc
+//
+// Copyright 1996-2003 Glyph & Cog, LLC
+//
+// Modified for Debian by Hamish Moffatt, 22 May 2002.
+//
+//========================================================================
+
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Array.h"
+#include "Dict.h"
+#include "XRef.h"
+#include "Catalog.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "PSOutputDev.h"
+#include "Error.h"
+#include "config.h"
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool level1 = gFalse;
+static GBool level1Sep = gFalse;
+static GBool level2 = gFalse;
+static GBool level2Sep = gFalse;
+static GBool level3 = gFalse;
+static GBool level3Sep = gFalse;
+static GBool doEPS = gFalse;
+static GBool doForm = gFalse;
+#if OPI_SUPPORT
+static GBool doOPI = gFalse;
+#endif
+static GBool noEmbedT1Fonts = gFalse;
+static GBool noEmbedTTFonts = gFalse;
+static GBool noEmbedCIDPSFonts = gFalse;
+static GBool noEmbedCIDTTFonts = gFalse;
+static char paperSize[15] = "";
+static int paperWidth = 0;
+static int paperHeight = 0;
+static GBool noCrop = gFalse;
+static GBool expand = gFalse;
+static GBool noShrink = gFalse;
+static GBool noCenter = gFalse;
+static GBool duplex = gFalse;
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static GBool quiet = gFalse;
+static char cfgFileName[256] = "";
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static ArgDesc argDesc[] = {
+  {"-f",          argInt,      &firstPage,      0,
+   "first page to print"},
+  {"-l",          argInt,      &lastPage,       0,
+   "last page to print"},
+  {"-level1",     argFlag,     &level1,         0,
+   "generate Level 1 PostScript"},
+  {"-level1sep",  argFlag,     &level1Sep,      0,
+   "generate Level 1 separable PostScript"},
+  {"-level2",     argFlag,     &level2,         0,
+   "generate Level 2 PostScript"},
+  {"-level2sep",  argFlag,     &level2Sep,      0,
+   "generate Level 2 separable PostScript"},
+  {"-level3",     argFlag,     &level3,         0,
+   "generate Level 3 PostScript"},
+  {"-level3sep",  argFlag,     &level3Sep,      0,
+   "generate Level 3 separable PostScript"},
+  {"-eps",        argFlag,     &doEPS,          0,
+   "generate Encapsulated PostScript (EPS)"},
+  {"-form",       argFlag,     &doForm,         0,
+   "generate a PostScript form"},
+#if OPI_SUPPORT
+  {"-opi",        argFlag,     &doOPI,          0,
+   "generate OPI comments"},
+#endif
+  {"-noembt1",    argFlag,     &noEmbedT1Fonts, 0,
+   "don't embed Type 1 fonts"},
+  {"-noembtt",    argFlag,     &noEmbedTTFonts, 0,
+   "don't embed TrueType fonts"},
+  {"-noembcidps", argFlag,     &noEmbedCIDPSFonts, 0,
+   "don't embed CID PostScript fonts"},
+  {"-noembcidtt", argFlag, &noEmbedCIDTTFonts,  0,
+   "don't embed CID TrueType fonts"},
+  {"-paper",      argString,   paperSize,       sizeof(paperSize),
+   "paper size (letter, legal, A4, A3, match)"},
+  {"-paperw",     argInt,      &paperWidth,     0,
+   "paper width, in points"},
+  {"-paperh",     argInt,      &paperHeight,    0,
+   "paper height, in points"},
+  {"-nocrop",     argFlag,     &noCrop,         0,
+   "don't crop pages to CropBox"},
+  {"-expand",     argFlag,     &expand,         0,
+   "expand pages smaller than the paper size"},
+  {"-noshrink",   argFlag,     &noShrink,       0,
+   "don't shrink pages larger than the paper size"},
+  {"-nocenter",   argFlag,     &noCenter,       0,
+   "don't center pages smaller than the paper size"},
+  {"-duplex",     argFlag,     &duplex,         0,
+   "enable duplex printing"},
+  {"-opw",        argString,   ownerPassword,   sizeof(ownerPassword),
+   "owner password (for encrypted files)"},
+  {"-upw",        argString,   userPassword,    sizeof(userPassword),
+   "user password (for encrypted files)"},
+  {"-q",          argFlag,     &quiet,          0,
+   "don't print any messages or errors"},
+  {"-cfg",        argString,      cfgFileName,    sizeof(cfgFileName),
+   "configuration file to use in place of .xpdfrc"},
+  {"-v",          argFlag,     &printVersion,   0,
+   "print copyright and version info"},
+  {"-h",          argFlag,     &printHelp,      0,
+   "print usage information"},
+  {"-help",       argFlag,     &printHelp,      0,
+   "print usage information"},
+  {"--help",      argFlag,     &printHelp,      0,
+   "print usage information"},
+  {"-?",          argFlag,     &printHelp,      0,
+   "print usage information"},
+  {NULL}
+};
+
+int main(int argc, char *argv[]) {
+  PDFDoc *doc;
+  GooString *fileName;
+  GooString *psFileName;
+  PSLevel level;
+  PSOutMode mode;
+  GooString *ownerPW, *userPW;
+  PSOutputDev *psOut;
+  GBool ok;
+  char *p;
+  int exitCode;
+
+  exitCode = 99;
+
+  // parse args
+  ok = parseArgs(argDesc, &argc, argv);
+  if (!ok || argc < 2 || argc > 3 || printVersion || printHelp) {
+    fprintf(stderr, "pdftops version %s\n", xpdfVersion);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdftops", "<PDF-file> [<PS-file>]", argDesc);
+    }
+    exit(1);
+  }
+  if ((level1 ? 1 : 0) +
+      (level1Sep ? 1 : 0) +
+      (level2 ? 1 : 0) +
+      (level2Sep ? 1 : 0) +
+      (level3 ? 1 : 0) +
+      (level3Sep ? 1 : 0) > 1) {
+    fprintf(stderr, "Error: use only one of the 'level' options.\n");
+    exit(1);
+  }
+  if (doEPS && doForm) {
+    fprintf(stderr, "Error: use only one of -eps and -form\n");
+    exit(1);
+  }
+  if (level1) {
+    level = psLevel1;
+  } else if (level1Sep) {
+    level = psLevel1Sep;
+  } else if (level2Sep) {
+    level = psLevel2Sep;
+  } else if (level3) {
+    level = psLevel3;
+  } else if (level3Sep) {
+    level = psLevel3Sep;
+  } else {
+    level = psLevel2;
+  }
+  if (doForm && level < psLevel2) {
+    fprintf(stderr, "Error: forms are only available with Level 2 output.\n");
+    exit(1);
+  }
+  mode = doEPS ? psModeEPS
+               : doForm ? psModeForm
+                        : psModePS;
+  fileName = new GooString(argv[1]);
+
+  // read config file
+  globalParams = new GlobalParams(cfgFileName);
+  if (paperSize[0]) {
+    if (!globalParams->setPSPaperSize(paperSize)) {
+      fprintf(stderr, "Invalid paper size\n");
+      delete fileName;
+      goto err0;
+    }
+  } else {
+    if (paperWidth) {
+      globalParams->setPSPaperWidth(paperWidth);
+    }
+    if (paperHeight) {
+      globalParams->setPSPaperHeight(paperHeight);
+    }
+  }
+  if (noCrop) {
+    globalParams->setPSCrop(gFalse);
+  }
+  if (expand) {
+    globalParams->setPSExpandSmaller(gTrue);
+  }
+  if (noShrink) {
+    globalParams->setPSShrinkLarger(gFalse);
+  }
+  if (noCenter) {
+    globalParams->setPSCenter(gFalse);
+  }
+  if (duplex) {
+    globalParams->setPSDuplex(duplex);
+  }
+  if (level1 || level1Sep || level2 || level2Sep || level3 || level3Sep) {
+    globalParams->setPSLevel(level);
+  }
+  if (noEmbedT1Fonts) {
+    globalParams->setPSEmbedType1(!noEmbedT1Fonts);
+  }
+  if (noEmbedTTFonts) {
+    globalParams->setPSEmbedTrueType(!noEmbedTTFonts);
+  }
+  if (noEmbedCIDPSFonts) {
+    globalParams->setPSEmbedCIDPostScript(!noEmbedCIDPSFonts);
+  }
+  if (noEmbedCIDTTFonts) {
+    globalParams->setPSEmbedCIDTrueType(!noEmbedCIDTTFonts);
+  }
+#if OPI_SUPPORT
+  if (doOPI) {
+    globalParams->setPSOPI(doOPI);
+  }
+#endif
+  if (quiet) {
+    globalParams->setErrQuiet(quiet);
+  }
+
+  // open PDF file
+  if (ownerPassword[0] != '\001') {
+    ownerPW = new GooString(ownerPassword);
+  } else {
+    ownerPW = NULL;
+  }
+  if (userPassword[0] != '\001') {
+    userPW = new GooString(userPassword);
+  } else {
+    userPW = NULL;
+  }
+  doc = new PDFDoc(fileName, ownerPW, userPW);
+  if (userPW) {
+    delete userPW;
+  }
+  if (ownerPW) {
+    delete ownerPW;
+  }
+  if (!doc->isOk()) {
+    exitCode = 1;
+    goto err1;
+  }
+
+#ifdef ENFORCE_PERMISSIONS
+  // check for print permission
+  if (!doc->okToPrint()) {
+    error(-1, "Printing this document is not allowed.");
+    exitCode = 3;
+    goto err1;
+  }
+#endif
+
+  // construct PostScript file name
+  if (argc == 3) {
+    psFileName = new GooString(argv[2]);
+  } else {
+    p = fileName->getCString() + fileName->getLength() - 4;
+    if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) {
+      psFileName = new GooString(fileName->getCString(),
+			       fileName->getLength() - 4);
+    } else {
+      psFileName = fileName->copy();
+    }
+    psFileName->append(doEPS ? ".eps" : ".ps");
+  }
+
+  // get page range
+  if (firstPage < 1) {
+    firstPage = 1;
+  }
+  if (lastPage < 1 || lastPage > doc->getNumPages()) {
+    lastPage = doc->getNumPages();
+  }
+
+  // check for multi-page EPS or form
+  if ((doEPS || doForm) && firstPage != lastPage) {
+    error(-1, "EPS and form files can only contain one page.");
+    goto err2;
+  }
+
+  // write PostScript file
+  psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(),
+			  doc->getCatalog(), firstPage, lastPage, mode);
+  if (psOut->isOk()) {
+    doc->displayPages(psOut, firstPage, lastPage, 72, 72,
+		      0, globalParams->getPSCrop(), gFalse, gFalse);
+  } else {
+    delete psOut;
+    exitCode = 2;
+    goto err2;
+  }
+  delete psOut;
+
+  exitCode = 0;
+
+  // clean up
+ err2:
+  delete psFileName;
+ err1:
+  delete doc;
+ err0:
+  delete globalParams;
+
+  // check for memory leaks
+  Object::memCheck(stderr);
+  gMemReport(stderr);
+
+  return exitCode;
+}
diff --git a/utils/pdftotext.1 b/utils/pdftotext.1
new file mode 100644
index 00000000..11a67694
--- /dev/null
+++ b/utils/pdftotext.1
@@ -0,0 +1,135 @@
+.\" Copyright 1997-2004 Glyph & Cog, LLC
+.TH pdftotext 1 "22 January 2004"
+.SH NAME
+pdftotext \- Portable Document Format (PDF) to text converter
+(version 3.00)
+.SH SYNOPSIS
+.B pdftotext
+[options]
+.RI [ PDF-file
+.RI [ text-file ]]
+.SH DESCRIPTION
+.B Pdftotext
+converts Portable Document Format (PDF) files to plain text.
+.PP
+Pdftotext reads the PDF file,
+.IR PDF-file ,
+and writes a text file,
+.IR text-file .
+If
+.I text-file
+is not specified, pdftotext converts
+.I file.pdf
+to
+.IR file.txt .
+If 
+.I text-file
+is \'-', the text is sent to stdout.
+.SH CONFIGURATION FILE
+Pdftotext reads a configuration file at startup.  It first tries to
+find the user's private config file, ~/.xpdfrc.  If that doesn't
+exist, it looks for a system-wide config file, /etc/xpdf/xpdfrc.  See the
+.BR xpdfrc (5)
+man page for details.
+.SH OPTIONS
+Many of the following options can be set with configuration file
+commands.  These are listed in square brackets with the description of
+the corresponding command line option.
+.TP
+.BI \-f " number"
+Specifies the first page to convert.
+.TP
+.BI \-l " number"
+Specifies the last page to convert.
+.TP
+.B \-layout
+Maintain (as best as possible) the original physical layout of the
+text.  The default is to \'undo' physical layout (columns,
+hyphenation, etc.) and output the text in reading order.
+.TP
+.B \-raw
+Keep the text in content stream order.  This is a hack which often
+"undoes" column formatting, etc.  Use of raw mode is no longer
+recommended.
+.TP
+.B \-htmlmeta
+Generate a simple HTML file, including the meta information.  This
+simply wraps the text in <pre> and </pre> and prepends the meta
+headers.
+.TP
+.BI \-enc " encoding-name"
+Sets the encoding to use for text output.  The
+.I encoding\-name
+must be defined with the unicodeMap command (see
+.BR xpdfrc (5)).
+The encoding name is case-sensitive.  This defaults to "Latin1" (which
+is a built-in encoding).
+.RB "[config file: " textEncoding ]
+.TP
+.BI \-eol " unix | dos | mac"
+Sets the end-of-line convention to use for text output.
+.RB "[config file: " textEOL ]
+.TP
+.B \-nopgbrk
+Don't insert page breaks (form feed characters) between pages.
+.RB "[config file: " textPageBreaks ]
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file.  Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.B \-q
+Don't print any messages or errors.
+.RB "[config file: " errQuiet ]
+.TP
+.BI \-cfg " config-file"
+Read
+.I config-file
+in place of ~/.xpdfrc or the system-wide config file.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH BUGS
+Some PDF files contain fonts whose encodings have been mangled beyond
+recognition.  There is no way (short of OCR) to extract text from
+these files.
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdftotext software and documentation are copyright 1996-2004 Glyph
+& Cog, LLC.
+.SH "SEE ALSO"
+.BR xpdf (1),
+.BR pdftops (1),
+.BR pdfinfo (1),
+.BR pdffonts (1),
+.BR pdftoppm (1),
+.BR pdfimages (1),
+.BR xpdfrc (5)
+.br
+.B http://www.foolabs.com/xpdf/
diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc
new file mode 100644
index 00000000..f8dfa80f
--- /dev/null
+++ b/utils/pdftotext.cc
@@ -0,0 +1,337 @@
+//========================================================================
+//
+// pdftotext.cc
+//
+// Copyright 1997-2003 Glyph & Cog, LLC
+//
+// Modified for Debian by Hamish Moffatt, 22 May 2002.
+//
+//========================================================================
+
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Array.h"
+#include "Dict.h"
+#include "XRef.h"
+#include "Catalog.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "TextOutputDev.h"
+#include "CharTypes.h"
+#include "UnicodeMap.h"
+#include "Error.h"
+#include "config.h"
+
+static void printInfoString(FILE *f, Dict *infoDict, char *key,
+			    char *text1, char *text2, UnicodeMap *uMap);
+static void printInfoDate(FILE *f, Dict *infoDict, char *key, char *fmt);
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool physLayout = gFalse;
+static GBool rawOrder = gFalse;
+static GBool htmlMeta = gFalse;
+static char textEncName[128] = "";
+static char textEOL[16] = "";
+static GBool noPageBreaks = gFalse;
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static GBool quiet = gFalse;
+static char cfgFileName[256] = "";
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static ArgDesc argDesc[] = {
+  {"-f",       argInt,      &firstPage,     0,
+   "first page to convert"},
+  {"-l",       argInt,      &lastPage,      0,
+   "last page to convert"},
+  {"-layout",  argFlag,     &physLayout,    0,
+   "maintain original physical layout"},
+  {"-raw",     argFlag,     &rawOrder,      0,
+   "keep strings in content stream order"},
+  {"-htmlmeta", argFlag,   &htmlMeta,       0,
+   "generate a simple HTML file, including the meta information"},
+  {"-enc",     argString,   textEncName,    sizeof(textEncName),
+   "output text encoding name"},
+  {"-eol",     argString,   textEOL,        sizeof(textEOL),
+   "output end-of-line convention (unix, dos, or mac)"},
+  {"-nopgbrk", argFlag,     &noPageBreaks,  0,
+   "don't insert page breaks between pages"},
+  {"-opw",     argString,   ownerPassword,  sizeof(ownerPassword),
+   "owner password (for encrypted files)"},
+  {"-upw",     argString,   userPassword,   sizeof(userPassword),
+   "user password (for encrypted files)"},
+  {"-q",       argFlag,     &quiet,         0,
+   "don't print any messages or errors"},
+  {"-cfg",     argString,   cfgFileName,    sizeof(cfgFileName),
+   "configuration file to use in place of .xpdfrc"},
+  {"-v",       argFlag,     &printVersion,  0,
+   "print copyright and version info"},
+  {"-h",       argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-help",    argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"--help",   argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-?",       argFlag,     &printHelp,     0,
+   "print usage information"},
+  {NULL}
+};
+
+int main(int argc, char *argv[]) {
+  PDFDoc *doc;
+  GooString *fileName;
+  GooString *textFileName;
+  GooString *ownerPW, *userPW;
+  TextOutputDev *textOut;
+  FILE *f;
+  UnicodeMap *uMap;
+  Object info;
+  GBool ok;
+  char *p;
+  int exitCode;
+
+  exitCode = 99;
+
+  // parse args
+  ok = parseArgs(argDesc, &argc, argv);
+  if (!ok || argc < 2 || argc > 3 || printVersion || printHelp) {
+    fprintf(stderr, "pdftotext version %s\n", xpdfVersion);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdftotext", "<PDF-file> [<text-file>]", argDesc);
+    }
+    goto err0;
+  }
+  fileName = new GooString(argv[1]);
+
+  // read config file
+  globalParams = new GlobalParams(cfgFileName);
+  if (textEncName[0]) {
+    globalParams->setTextEncoding(textEncName);
+  }
+  if (textEOL[0]) {
+    if (!globalParams->setTextEOL(textEOL)) {
+      fprintf(stderr, "Bad '-eol' value on command line\n");
+    }
+  }
+  if (noPageBreaks) {
+    globalParams->setTextPageBreaks(gFalse);
+  }
+  if (quiet) {
+    globalParams->setErrQuiet(quiet);
+  }
+
+  // get mapping to output encoding
+  if (!(uMap = globalParams->getTextEncoding())) {
+    error(-1, "Couldn't get text encoding");
+    delete fileName;
+    goto err1;
+  }
+
+  // open PDF file
+  if (ownerPassword[0] != '\001') {
+    ownerPW = new GooString(ownerPassword);
+  } else {
+    ownerPW = NULL;
+  }
+  if (userPassword[0] != '\001') {
+    userPW = new GooString(userPassword);
+  } else {
+    userPW = NULL;
+  }
+  doc = new PDFDoc(fileName, ownerPW, userPW);
+  if (userPW) {
+    delete userPW;
+  }
+  if (ownerPW) {
+    delete ownerPW;
+  }
+  if (!doc->isOk()) {
+    exitCode = 1;
+    goto err2;
+  }
+
+#ifdef ENFORCE_PERMISSIONS
+  // check for copy permission
+  if (!doc->okToCopy()) {
+    error(-1, "Copying of text from this document is not allowed.");
+    exitCode = 3;
+    goto err2;
+  }
+#endif
+
+  // construct text file name
+  if (argc == 3) {
+    textFileName = new GooString(argv[2]);
+  } else {
+    p = fileName->getCString() + fileName->getLength() - 4;
+    if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) {
+      textFileName = new GooString(fileName->getCString(),
+				 fileName->getLength() - 4);
+    } else {
+      textFileName = fileName->copy();
+    }
+    textFileName->append(htmlMeta ? ".html" : ".txt");
+  }
+
+  // get page range
+  if (firstPage < 1) {
+    firstPage = 1;
+  }
+  if (lastPage < 1 || lastPage > doc->getNumPages()) {
+    lastPage = doc->getNumPages();
+  }
+
+  // write HTML header
+  if (htmlMeta) {
+    if (!textFileName->cmp("-")) {
+      f = stdout;
+    } else {
+      if (!(f = fopen(textFileName->getCString(), "wb"))) {
+	error(-1, "Couldn't open text file '%s'", textFileName->getCString());
+	exitCode = 2;
+	goto err3;
+      }
+    }
+    fputs("<html>\n", f);
+    fputs("<head>\n", f);
+    doc->getDocInfo(&info);
+    if (info.isDict()) {
+      printInfoString(f, info.getDict(), "Title", "<title>", "</title>\n",
+		      uMap);
+      printInfoString(f, info.getDict(), "Subject",
+		      "<meta name=\"Subject\" content=\"", "\">\n", uMap);
+      printInfoString(f, info.getDict(), "Keywords",
+		      "<meta name=\"Keywords\" content=\"", "\">\n", uMap);
+      printInfoString(f, info.getDict(), "Author",
+		      "<meta name=\"Author\" content=\"", "\">\n", uMap);
+      printInfoString(f, info.getDict(), "Creator",
+		      "<meta name=\"Creator\" content=\"", "\">\n", uMap);
+      printInfoString(f, info.getDict(), "Producer",
+		      "<meta name=\"Producer\" content=\"", "\">\n", uMap);
+      printInfoDate(f, info.getDict(), "CreationDate",
+		    "<meta name=\"CreationDate\" content=\"\">\n");
+      printInfoDate(f, info.getDict(), "LastModifiedDate",
+		    "<meta name=\"ModDate\" content=\"\">\n");
+    }
+    info.free();
+    fputs("</head>\n", f);
+    fputs("<body>\n", f);
+    fputs("<pre>\n", f);
+    if (f != stdout) {
+      fclose(f);
+    }
+  }
+
+  // write text file
+  textOut = new TextOutputDev(textFileName->getCString(),
+			      physLayout, rawOrder, htmlMeta);
+  if (textOut->isOk()) {
+      doc->displayPages(textOut, firstPage, lastPage, 72, 72, 0,
+			gTrue, gFalse, gFalse);
+  } else {
+    delete textOut;
+    exitCode = 2;
+    goto err3;
+  }
+  delete textOut;
+
+  // write end of HTML file
+  if (htmlMeta) {
+    if (!textFileName->cmp("-")) {
+      f = stdout;
+    } else {
+      if (!(f = fopen(textFileName->getCString(), "ab"))) {
+	error(-1, "Couldn't open text file '%s'", textFileName->getCString());
+	exitCode = 2;
+	goto err3;
+      }
+    }
+    fputs("</pre>\n", f);
+    fputs("</body>\n", f);
+    fputs("</html>\n", f);
+    if (f != stdout) {
+      fclose(f);
+    }
+  }
+
+  exitCode = 0;
+
+  // clean up
+ err3:
+  delete textFileName;
+ err2:
+  delete doc;
+  uMap->decRefCnt();
+ err1:
+  delete globalParams;
+ err0:
+
+  // check for memory leaks
+  Object::memCheck(stderr);
+  gMemReport(stderr);
+
+  return exitCode;
+}
+
+static void printInfoString(FILE *f, Dict *infoDict, char *key,
+			    char *text1, char *text2, UnicodeMap *uMap) {
+  Object obj;
+  GooString *s1;
+  GBool isUnicode;
+  Unicode u;
+  char buf[8];
+  int i, n;
+
+  if (infoDict->lookup(key, &obj)->isString()) {
+    fputs(text1, f);
+    s1 = obj.getString();
+    if ((s1->getChar(0) & 0xff) == 0xfe &&
+	(s1->getChar(1) & 0xff) == 0xff) {
+      isUnicode = gTrue;
+      i = 2;
+    } else {
+      isUnicode = gFalse;
+      i = 0;
+    }
+    while (i < obj.getString()->getLength()) {
+      if (isUnicode) {
+	u = ((s1->getChar(i) & 0xff) << 8) |
+	    (s1->getChar(i+1) & 0xff);
+	i += 2;
+      } else {
+	u = s1->getChar(i) & 0xff;
+	++i;
+      }
+      n = uMap->mapUnicode(u, buf, sizeof(buf));
+      fwrite(buf, 1, n, f);
+    }
+    fputs(text2, f);
+  }
+  obj.free();
+}
+
+static void printInfoDate(FILE *f, Dict *infoDict, char *key, char *fmt) {
+  Object obj;
+  char *s;
+
+  if (infoDict->lookup(key, &obj)->isString()) {
+    s = obj.getString()->getCString();
+    if (s[0] == 'D' && s[1] == ':') {
+      s += 2;
+    }
+    fprintf(f, fmt, s);
+  }
+  obj.free();
+}
author	Kristian Høgsberg <krh@redhat.com>	2005-12-12 20:15:11 +0000
committer	Kristian Høgsberg <krh@redhat.com>	2005-12-12 20:15:11 +0000
commit	bcc5e3afe27c8787ce7022a0701997c96eddb4fe (patch)
tree	31c1727f926945dd49ef3d8dd56b9f6f0ef07618
parent	5fbded32741acb5fac411189f80cb57aa11df517 (diff)