diff options
author | Ashod Nakashian <ashod.nakashian@collabora.co.uk> | 2018-04-08 23:38:55 -0400 |
---|---|---|
committer | Jan Holesovsky <kendy@collabora.com> | 2018-06-01 08:59:14 +0200 |
commit | 1cd2306527e44186eee78147e522e4adab38f862 (patch) | |
tree | 57ee6b2f47a9e7c6353eedf5f1f91dafd6d1a57b /external | |
parent | fffc15fc36ac9c6b4a609c3d5a7322573d1e3d29 (diff) |
svx: more accurate PDF text importing
Change-Id: If37119510cbc091dc86cb5f699984186167745c7
Diffstat (limited to 'external')
-rw-r--r-- | external/pdfium/edit.patch.1 | 136 |
1 files changed, 131 insertions, 5 deletions
diff --git a/external/pdfium/edit.patch.1 b/external/pdfium/edit.patch.1 index 78cf4c3394c6..b7cd86e5ff2d 100644 --- a/external/pdfium/edit.patch.1 +++ b/external/pdfium/edit.patch.1 @@ -35,10 +35,17 @@ index 0a01ae0..fad2920 100644 DrawTextPathWithPattern(textobj, pObj2Device, pFont, font_size, &text_matrix, bFill, bStroke); diff --git a/fpdfsdk/fpdfeditpage.cpp b/fpdfsdk/fpdfeditpage.cpp -index ca2cf3f..ef4b958 100644 +index ca2cf3f..ac36788 100644 --- a/fpdfsdk/fpdfeditpage.cpp +++ b/fpdfsdk/fpdfeditpage.cpp -@@ -17,6 +17,7 @@ +@@ -11,12 +11,14 @@ + #include <utility> + + #include "core/fpdfapi/edit/cpdf_pagecontentgenerator.h" ++#include "core/fpdfapi/font/cpdf_font.h" + #include "core/fpdfapi/page/cpdf_form.h" + #include "core/fpdfapi/page/cpdf_formobject.h" + #include "core/fpdfapi/page/cpdf_imageobject.h" #include "core/fpdfapi/page/cpdf_page.h" #include "core/fpdfapi/page/cpdf_pageobject.h" #include "core/fpdfapi/page/cpdf_pathobject.h" @@ -46,11 +53,31 @@ index ca2cf3f..ef4b958 100644 #include "core/fpdfapi/page/cpdf_shadingobject.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_document.h" -@@ -363,3 +364,20 @@ FPDFPageObj_GetBounds(FPDF_PAGEOBJECT pageObject, +@@ -363,3 +365,103 @@ FPDFPageObj_GetBounds(FPDF_PAGEOBJECT pageObject, *top = bbox.top; return true; } + ++FPDF_EXPORT int FPDF_CALLCONV ++FPDFTextObj_CountChars(FPDF_PAGEOBJECT text_object) ++{ ++ if (!text_object) ++ return 0; ++ ++ CPDF_TextObject* pTxtObj = static_cast<CPDF_TextObject*>(text_object); ++ return pTxtObj->CountChars(); ++} ++ ++FPDF_EXPORT int FPDF_CALLCONV ++FPDFTextObj_GetFontSize(FPDF_PAGEOBJECT text_object) ++{ ++ if (!text_object) ++ return 0; ++ ++ CPDF_TextObject* pTxtObj = static_cast<CPDF_TextObject*>(text_object); ++ return pTxtObj->GetFontSize(); ++} ++ +FPDF_EXPORT void FPDF_CALLCONV +FPDFTextObj_GetMatrix(FPDF_PAGEOBJECT text_object, + double* a, @@ -67,6 +94,69 @@ index ca2cf3f..ef4b958 100644 + *c = matrix.c; + *d = matrix.d; +} ++ ++FPDF_EXPORT int FPDF_CALLCONV ++FPDFTextObj_GetUnicode(FPDF_PAGEOBJECT text_object, int index) ++{ ++ if (!text_object || index < 0) ++ return 0; ++ ++ CPDF_TextObject* pTxtObj = static_cast<CPDF_TextObject*>(text_object); ++ if (index > pTxtObj->CountChars()) ++ return 0; ++ ++ CPDF_TextObjectItem info; ++ pTxtObj->GetCharInfo(index, &info); ++ return info.m_CharCode; ++} ++ ++FPDF_EXPORT int FPDF_CALLCONV FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object, ++ int char_start, ++ int char_count, ++ unsigned short* result) { ++ if (!text_object || char_start < 0 || char_count < 0 || !result) ++ return 0; ++ ++ CPDF_TextObject* pTxtObj = static_cast<CPDF_TextObject*>(text_object); ++ int char_available = pTxtObj->CountChars() - char_start; ++ if (char_available <= 0) ++ return 0; ++ ++ char_count = std::min(char_count, char_available); ++ if (char_count == 0) { ++ // Writing out "", which has a character count of 1 due to the NUL. ++ *result = '\0'; ++ return 1; ++ } ++ ++ CPDF_Font* pFont = pTxtObj->GetFont(); ++ WideString str; ++ for (uint32_t charcode : pTxtObj->GetCharCodes()) { ++ if (charcode != CPDF_Font::kInvalidCharCode) ++ str += pFont->UnicodeFromCharCode(charcode); ++ } ++ ++// CFX_WideTextBuf m_TextBuf; ++// WideString str = textpage->GetPageText(char_start, char_count); ++// return WideString(m_TextBuf.AsStringView().Mid( ++// static_cast<size_t>(text_start), static_cast<size_t>(text_count))); ++ ++// if (str.GetLength() > static_cast<size_t>(char_count)) ++// str = str.Left(static_cast<size_t>(char_count)); ++ ++ // Reincode in UTF-16. ++// WideString str = text.UTF8Decode(); ++ ++ // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected ++ // the number of items to stay the same. ++ ByteString byte_str = str.UTF16LE_Encode(); ++ size_t byte_str_len = byte_str.GetLength(); ++ int ret_count = byte_str_len / sizeof(unsigned short); ++ ++ ASSERT(ret_count <= char_count + 1); // +1 to account for the NUL terminator. ++ memcpy(result, byte_str.GetBuffer(byte_str_len), byte_str_len); ++ return ret_count; ++} diff --git a/fpdfsdk/fpdftext.cpp b/fpdfsdk/fpdftext.cpp index 68bf4f8..e073b20 100644 --- a/fpdfsdk/fpdftext.cpp @@ -101,13 +191,33 @@ index 68bf4f8..e073b20 100644 int index, double* left, diff --git a/public/fpdf_edit.h b/public/fpdf_edit.h -index 54735a3..3642a2a 100644 +index 54735a3..a9c1a25 100644 --- a/public/fpdf_edit.h +++ b/public/fpdf_edit.h -@@ -761,6 +761,21 @@ FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document, +@@ -761,6 +761,57 @@ FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document, FPDF_FONT font, float font_size); ++// Get the number of characters from a text object. ++// ++// text_object - Handle of text object returned by FPDFPageObj_NewTextObj ++// or FPDFPageObj_NewTextObjEx. ++// Return Value: ++// A character count in the text object. ++FPDF_EXPORT int FPDF_CALLCONV ++FPDFTextObj_CountChars(FPDF_PAGEOBJECT text_object); ++ ++ ++// Get the font size of a text object. ++// ++// text_object - Handle of text object returned by FPDFPageObj_NewTextObj ++// or FPDFPageObj_NewTextObjEx. ++// ++// Return Value: ++// The value of the font size ++FPDF_EXPORT int FPDF_CALLCONV ++FPDFTextObj_GetFontSize(FPDF_PAGEOBJECT text_object); ++ +// Get the matrix of a particular text object. +// +// text_object - Handle of text object returned by FPDFPageObj_NewTextObj @@ -123,6 +233,22 @@ index 54735a3..3642a2a 100644 + double* c, + double* d); + ++// Get the unicode of a special character in a text object. ++// ++// text_object - Handle of text object returned by FPDFPageObj_NewTextObj ++// or FPDFPageObj_NewTextObjEx. ++// index - The index of the character to get the unicode. ++// Return Value: ++// The unicode value. ++FPDF_EXPORT int FPDF_CALLCONV ++FPDFTextObj_GetUnicode(FPDF_PAGEOBJECT text_object, int index); ++ ++FPDF_EXPORT int FPDF_CALLCONV ++FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object, ++ int char_start, ++ int char_count, ++ unsigned short* result); ++ #ifdef __cplusplus } // extern "C" #endif // __cplusplus |