summaryrefslogtreecommitdiff
path: root/utils/HtmlFonts.cc
blob: ad67c5db515181fbbb67c2e04723a78187082c70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
//========================================================================
//
// This file comes from pdftohtml project
// http://pdftohtml.sourceforge.net
//
// Copyright from:
// Gueorgui Ovtcharov
// Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/>
// Mikhail Kruk <meshko@cs.brandeis.edu>
//
//========================================================================

//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2007, 2010, 2012, 2018 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
// Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
// Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
// Copyright (C) 2012 Igor Slepchin <igor.slepchin@gmail.com>
// Copyright (C) 2012 Luis Parravicini <lparravi@gmail.com>
// Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
// Copyright (C) 2017 Jason Crain <jason@inspiresomeone.us>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Steven Boswell <ulatekh@yahoo.com>
// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
// Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================

#include "HtmlFonts.h"
#include "HtmlUtils.h"
#include "GlobalParams.h"
#include "UnicodeMap.h"
#include "GfxFont.h"
#include <cstdio>

namespace
{

const char* const defaultFamilyName = "Times";

const char* const styleSuffixes[] = {
  "-Regular",
  "-Bold",
  "-BoldOblique",
  "-BoldItalic",
  "-Oblique",
  "-Italic",
  "-Roman",
};

void removeStyleSuffix(std::string& familyName) {
  for (const char* const styleSuffix : styleSuffixes) {
    auto pos = familyName.rfind(styleSuffix);
    if (pos != std::string::npos) {
      familyName.resize(pos);
      return;
    }
  }
}

}

#define xoutRound(x) ((int)(x + 0.5))
extern bool xml;
extern bool fontFullName;

HtmlFontColor::HtmlFontColor(GfxRGB rgb){
  r=static_cast<int>(rgb.r/65535.0*255.0);
  g=static_cast<int>(rgb.g/65535.0*255.0);
  b=static_cast<int>(rgb.b/65535.0*255.0);
  if (!(Ok(r)&&Ok(b)&&Ok(g))) {
    if (!globalParams->getErrQuiet()) fprintf(stderr, "Error : Bad color (%d,%d,%d) reset to (0,0,0)\n", r, g, b);
    r=0;g=0;b=0;
  }
}

GooString *HtmlFontColor::convtoX(unsigned int xcol) const{
  GooString *xret=new GooString();
  char tmp;
  unsigned  int k;
  k = (xcol/16);
  if (k<10) tmp=(char) ('0'+k); else tmp=(char)('a'+k-10);
  xret->append(tmp);
  k = (xcol%16);
  if (k<10) tmp=(char) ('0'+k); else tmp=(char)('a'+k-10);
  xret->append(tmp);
 return xret;
}

GooString *HtmlFontColor::toString() const{
  GooString *tmp=new GooString("#");
  GooString *tmpr=convtoX(r); 
  GooString *tmpg=convtoX(g);
  GooString *tmpb=convtoX(b);
  tmp->append(tmpr);
  tmp->append(tmpg);
  tmp->append(tmpb);
  delete tmpr;
  delete tmpg;
  delete tmpb;
  return tmp;
} 

HtmlFont::HtmlFont(GfxFont *font, int _size, GfxRGB rgb){
  color=HtmlFontColor(rgb);

  lineSize = -1;

  size=(_size-1);
  italic = false;
  bold = false;
  rotOrSkewed = false;

  if (font->isBold() || font->getWeight() >= GfxFont::W700) bold=true;
  if (font->isItalic()) italic=true;

  if (const GooString *fontname = font->getName()){
    FontName = new GooString(fontname);

    GooString fontnameLower(fontname);
    fontnameLower.lowerCase();

    if (!bold && strstr(fontnameLower.c_str(),"bold")) {
		bold=true;
    }

    if (!italic &&
	(strstr(fontnameLower.c_str(),"italic")||
	 strstr(fontnameLower.c_str(),"oblique"))) {
		italic=true;
    }

    familyName = fontname->c_str();
    removeStyleSuffix(familyName);
  } else {
    FontName = new GooString(defaultFamilyName);
    familyName = defaultFamilyName;
  }

  rotSkewMat[0] = rotSkewMat[1] = rotSkewMat[2] = rotSkewMat[3] = 0;
}
 
HtmlFont::HtmlFont(const HtmlFont& x){
   size=x.size;
   lineSize=x.lineSize;
   italic=x.italic;
   bold=x.bold;
   familyName=x.familyName;
   color=x.color;
   FontName=new GooString(x.FontName);
   rotOrSkewed = x.rotOrSkewed;
   memcpy(rotSkewMat, x.rotSkewMat, sizeof(rotSkewMat));
 }


HtmlFont::~HtmlFont(){
  delete FontName;
}

HtmlFont& HtmlFont::operator=(const HtmlFont& x){
   if (this==&x) return *this; 
   size=x.size;
   lineSize=x.lineSize;
   italic=x.italic;
   bold=x.bold;
   familyName=x.familyName;
   color=x.color;
   delete FontName;
   FontName=new GooString(x.FontName);
   return *this;
}

/*
  This function is used to compare font uniquely for insertion into
  the list of all encountered fonts
*/
bool HtmlFont::isEqual(const HtmlFont& x) const{
  return (size==x.size) &&
	  (lineSize==x.lineSize) &&
	  (FontName->cmp(x.FontName) == 0) && (bold==x.bold) && (italic==x.italic) &&
	  (color.isEqual(x.getColor())) && isRotOrSkewed() == x.isRotOrSkewed() &&
	  (!isRotOrSkewed() || rot_matrices_equal(getRotMat(), x.getRotMat()));
}

/*
  This one is used to decide whether two pieces of text can be joined together
  and therefore we don't care about bold/italics properties
*/
bool HtmlFont::isEqualIgnoreBold(const HtmlFont& x) const{
  return ((size==x.size) &&
	  (familyName == x.familyName) &&
	  (color.isEqual(x.getColor())));
}

GooString* HtmlFont::getFontName(){
  return new GooString(familyName);
}

GooString* HtmlFont::getFullName(){
  return new GooString(FontName);
}

// this method if plain wrong todo
GooString* HtmlFont::HtmlFilter(const Unicode* u, int uLen) {
  GooString *tmp = new GooString();
  UnicodeMap *uMap;
  char buf[8];
  int n;

  // get the output encoding
  if (!(uMap = globalParams->getTextEncoding())) {
    return tmp;
  }

  for (int i = 0; i < uLen; ++i) {
    // skip control characters.  W3C disallows them and they cause a warning
    // with PHP.
    if (u[i] <= 31 && u[i] != '\t')
      continue;

    switch (u[i])
      { 
	case '"': tmp->append("&#34;");  break;
	case '&': tmp->append("&amp;");  break;
	case '<': tmp->append("&lt;");  break;
	case '>': tmp->append("&gt;");  break;
	case ' ': case '\t': tmp->append( !xml && ( i+1 >= uLen || !tmp->getLength() || tmp->getChar( tmp->getLength()-1 ) == ' ' ) ? "&#160;" : " " );
	          break;
	default:  
	  {
	    // convert unicode to string
	    if ((n = uMap->mapUnicode(u[i], buf, sizeof(buf))) > 0) {
	      tmp->append(buf, n); 
	  }
      }
    }
  }

  uMap->decRefCnt();
  return tmp;
}

HtmlFontAccu::HtmlFontAccu(){
  accu=new std::vector<HtmlFont>();
}

HtmlFontAccu::~HtmlFontAccu(){
  if (accu) delete accu;
}

int HtmlFontAccu::AddFont(const HtmlFont& font){
 std::vector<HtmlFont>::iterator i; 
 for (i=accu->begin();i!=accu->end();++i)
 {
	if (font.isEqual(*i)) 
	{
		return (int)(i-(accu->begin()));
	}
 }

 accu->push_back(font);
 return (accu->size()-1);
}

// get CSS font definition for font #i 
GooString* HtmlFontAccu::CSStyle(int i, int j){
   GooString *tmp=new GooString();

   std::vector<HtmlFont>::iterator g=accu->begin();
   g+=i;
   HtmlFont font=*g;
   GooString *colorStr=font.getColor().toString();
   GooString *fontName=(fontFullName ? font.getFullName() : font.getFontName());
   
   if(!xml){
     tmp->append(".ft");
     tmp->append(std::to_string(j));
     tmp->append(std::to_string(i));
     tmp->append("{font-size:");
     tmp->append(std::to_string(font.getSize()));
     if( font.getLineSize() != -1 && font.getLineSize() != 0 )
     {
	 tmp->append("px;line-height:");
	 tmp->append(std::to_string(font.getLineSize()));
     }
     tmp->append("px;font-family:");
     tmp->append(fontName); //font.getFontName());
     tmp->append(";color:");
     tmp->append(colorStr);
     // if there is rotation or skew, include the matrix
     if (font.isRotOrSkewed()) {
    	 const double * const text_mat = font.getRotMat();
    	 GooString matrix_str(" matrix(");
    	 matrix_str.appendf("{0:10.10g}, {1:10.10g}, {2:10.10g}, {3:10.10g}, 0, 0)",
    			 text_mat[0], text_mat[1], text_mat[2], text_mat[3]);
    	 tmp->append(";-moz-transform:");
    	 tmp->append(&matrix_str);
    	 tmp->append(";-webkit-transform:");
    	 tmp->append(&matrix_str);
    	 tmp->append(";-o-transform:");
    	 tmp->append(&matrix_str);
    	 tmp->append(";-ms-transform:");
    	 tmp->append(&matrix_str);
    	 // Todo: 75% is a wild guess that seems to work pretty well;
    	 // We probably need to calculate the real percentage
    	 // Based on the characteristic baseline and bounding box of current font
    	 // PDF origin is at baseline
    	 tmp->append(";-moz-transform-origin: left 75%");
    	 tmp->append(";-webkit-transform-origin: left 75%");
    	 tmp->append(";-o-transform-origin: left 75%");
    	 tmp->append(";-ms-transform-origin: left 75%");
     }
     tmp->append(";}");
   }
   if (xml) {
     tmp->append("<fontspec id=\"");
     tmp->append(std::to_string(i));
     tmp->append("\" size=\"");
     tmp->append(std::to_string(font.getSize()));
     tmp->append("\" family=\"");
     tmp->append(fontName);
     tmp->append("\" color=\"");
     tmp->append(colorStr);
     tmp->append("\"/>");
   }

   delete fontName;
   delete colorStr;
   return tmp;
}