summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSune Vuorela <sune@vuorela.dk>2024-12-19 13:34:01 +0100
committerAlbert Astals Cid <aacid@kde.org>2024-12-20 17:02:06 +0000
commitd2609165be8815cfa613f71df7369c59a41f7a18 (patch)
treee4104b5c2bc46babad3a92e1f115948ca32642f2
parenta3fd179de9002c26da5cac62ca728894808e6513 (diff)
TextOutputDev: memory cleanups
Return some GooStrings by value, and convert a std::vector** to a vector of vectors
-rw-r--r--glib/poppler-page.cc73
-rw-r--r--poppler/TextOutputDev.cc108
-rw-r--r--poppler/TextOutputDev.h10
-rw-r--r--qt5/src/poppler-page.cc5
-rw-r--r--qt6/src/poppler-page.cc5
-rw-r--r--test/perf-test.cc7
6 files changed, 74 insertions, 134 deletions
diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index 38e5e680..29871887 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -726,7 +726,6 @@ cairo_region_t *poppler_page_get_selected_region(PopplerPage *page, gdouble scal
**/
char *poppler_page_get_selected_text(PopplerPage *page, PopplerSelectionStyle style, PopplerRectangle *selection)
{
- GooString *sel_text;
char *result;
TextPage *text;
SelectionStyle selection_style = selectionStyleGlyph;
@@ -753,9 +752,8 @@ char *poppler_page_get_selected_text(PopplerPage *page, PopplerSelectionStyle st
}
text = poppler_page_get_text_page(page);
- sel_text = text->getSelectionText(&pdf_selection, selection_style);
- result = g_strdup(sel_text->c_str());
- delete sel_text;
+ GooString sel_text = text->getSelectionText(&pdf_selection, selection_style);
+ result = g_strdup(sel_text.c_str());
return result;
}
@@ -2327,12 +2325,10 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang
TextPage *text;
PopplerRectangle *rect;
PDFRectangle selection;
- int i, k;
guint offset = 0;
guint n_rects = 0;
gdouble x1, y1, x2, y2;
gdouble x3, y3, x4, y4;
- int n_lines;
g_return_val_if_fail(POPPLER_IS_PAGE(page), FALSE);
g_return_val_if_fail(area != nullptr, FALSE);
@@ -2345,19 +2341,18 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang
selection.y2 = area->y2;
text = poppler_page_get_text_page(page);
- std::vector<TextWordSelection *> **word_list = text->getSelectionWords(&selection, selectionStyleGlyph, &n_lines);
- if (!word_list) {
+ std::vector<std::vector<std::unique_ptr<TextWordSelection>>> word_list = text->getSelectionWords(&selection, selectionStyleGlyph);
+ if (word_list.empty()) {
return FALSE;
}
- n_rects += n_lines - 1;
- for (i = 0; i < n_lines; i++) {
- std::vector<TextWordSelection *> *line_words = word_list[i];
- n_rects += line_words->size() - 1;
- for (std::size_t j = 0; j < line_words->size(); j++) {
- const TextWordSelection *word_sel = (*line_words)[j];
+ n_rects += word_list.size() - 1;
+ for (const std::vector<std::unique_ptr<TextWordSelection>> &line_words : word_list) {
+ n_rects += line_words.size() - 1;
+ for (std::size_t j = 0; j < line_words.size(); j++) {
+ const TextWordSelection *word_sel = line_words[j].get();
n_rects += word_sel->getEnd() - word_sel->getBegin();
- if (!word_sel->getWord()->hasSpaceAfter() && j < line_words->size() - 1) {
+ if (!word_sel->getWord()->hasSpaceAfter() && j < line_words.size() - 1) {
n_rects--;
}
}
@@ -2366,14 +2361,14 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang
*rectangles = g_new(PopplerRectangle, n_rects);
*n_rectangles = n_rects;
- for (i = 0; i < n_lines; i++) {
- std::vector<TextWordSelection *> *line_words = word_list[i];
- for (std::size_t j = 0; j < line_words->size(); j++) {
- TextWordSelection *word_sel = (*line_words)[j];
+ for (size_t i = 0; i < word_list.size(); i++) {
+ std::vector<std::unique_ptr<TextWordSelection>> &line_words = word_list[i];
+ for (std::size_t j = 0; j < line_words.size(); j++) {
+ TextWordSelection *word_sel = line_words[j].get();
const TextWord *word = word_sel->getWord();
int end = word_sel->getEnd();
- for (k = word_sel->getBegin(); k < end; k++) {
+ for (int k = word_sel->getBegin(); k < end; k++) {
rect = *rectangles + offset;
word->getCharBBox(k, &(rect->x1), &(rect->y1), &(rect->x2), &(rect->y2));
offset++;
@@ -2382,8 +2377,8 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang
rect = *rectangles + offset;
word->getBBox(&x1, &y1, &x2, &y2);
- if (word->hasSpaceAfter() && j < line_words->size() - 1) {
- TextWordSelection *next_word_sel = (*line_words)[j + 1];
+ if (word->hasSpaceAfter() && j < line_words.size() - 1) {
+ TextWordSelection *next_word_sel = line_words[j + 1].get();
next_word_sel->getWord()->getBBox(&x3, &y3, &x4, &y4);
// space is from one word to other and with the same height as
@@ -2394,11 +2389,9 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang
rect->y2 = y2;
offset++;
}
-
- delete word_sel;
}
- if (i < n_lines - 1 && offset > 0) {
+ if (i < word_list.size() - 1 && offset > 0) {
// end of line
rect->x1 = x2;
rect->y1 = y2;
@@ -2406,12 +2399,7 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang
rect->y2 = y2;
offset++;
}
-
- delete line_words;
}
-
- gfree(static_cast<void *>(word_list));
-
return TRUE;
}
@@ -2503,11 +2491,9 @@ GList *poppler_page_get_text_attributes_for_area(PopplerPage *page, PopplerRecta
{
TextPage *text;
PDFRectangle selection;
- int n_lines;
PopplerTextAttributes *attrs = nullptr;
const TextWord *word, *prev_word = nullptr;
gint word_i, prev_word_i;
- gint i;
gint offset = 0;
GList *attributes = nullptr;
@@ -2520,15 +2506,15 @@ GList *poppler_page_get_text_attributes_for_area(PopplerPage *page, PopplerRecta
selection.y2 = area->y2;
text = poppler_page_get_text_page(page);
- std::vector<TextWordSelection *> **word_list = text->getSelectionWords(&selection, selectionStyleGlyph, &n_lines);
- if (!word_list) {
+ std::vector<std::vector<std::unique_ptr<TextWordSelection>>> word_list = text->getSelectionWords(&selection, selectionStyleGlyph);
+ if (word_list.empty()) {
return nullptr;
}
- for (i = 0; i < n_lines; i++) {
- std::vector<TextWordSelection *> *line_words = word_list[i];
- for (std::size_t j = 0; j < line_words->size(); j++) {
- TextWordSelection *word_sel = (*line_words)[j];
+ for (size_t i = 0; i < word_list.size(); i++) {
+ std::vector<std::unique_ptr<TextWordSelection>> &line_words = word_list[i];
+ for (std::size_t j = 0; j < line_words.size(); j++) {
+ TextWordSelection *word_sel = line_words[j].get();
int end = word_sel->getEnd();
word = word_sel->getWord();
@@ -2545,23 +2531,16 @@ GList *poppler_page_get_text_attributes_for_area(PopplerPage *page, PopplerRecta
prev_word_i = word_i;
}
- if (word->hasSpaceAfter() && j < line_words->size() - 1) {
+ if (word->hasSpaceAfter() && j < line_words.size() - 1) {
attrs->end_index = offset;
offset++;
}
-
- delete word_sel;
}
- if (i < n_lines - 1) {
+ if (i < word_list.size() - 1) {
attrs->end_index = offset;
offset++;
}
-
- delete line_words;
}
-
- gfree(static_cast<void *>(word_list));
-
return g_list_reverse(attributes);
}
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 7a817ee8..1a53b5e2 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -4284,9 +4284,8 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB
return false;
}
-GooString *TextPage::getText(double xMin, double yMin, double xMax, double yMax, EndOfLineKind textEOL) const
+GooString TextPage::getText(double xMin, double yMin, double xMax, double yMax, EndOfLineKind textEOL) const
{
- GooString *s;
const UnicodeMap *uMap;
TextBlock *blk;
TextLine *line;
@@ -4300,7 +4299,7 @@ GooString *TextPage::getText(double xMin, double yMin, double xMax, double yMax,
int col, idx0, idx1, i, j;
bool multiLine, oneRot;
- s = new GooString();
+ GooString s;
// get the output encoding
if (!(uMap = globalParams->getTextEncoding())) {
@@ -4318,7 +4317,7 @@ GooString *TextPage::getText(double xMin, double yMin, double xMax, double yMax,
word->getCharBBox(j, &gXMin, &gYMin, &gXMax, &gYMax);
if (xMin <= gXMin && gXMax <= xMax && yMin <= gYMin && gYMax <= yMax) {
mbc_len = uMap->mapUnicode(*(word->getChar(j)), mbc, sizeof(mbc));
- s->append(mbc, mbc_len);
+ s.append(mbc, mbc_len);
}
}
}
@@ -4489,22 +4488,22 @@ GooString *TextPage::getText(double xMin, double yMin, double xMax, double yMax,
// insert a return
if (frag->col < col || (i > 0 && fabs(frag->base - frags[i - 1].base) > maxIntraLineDelta * frags[i - 1].line->words->fontSize)) {
- s->append(eol, eolLen);
+ s.append(eol, eolLen);
col = 0;
multiLine = true;
}
// column alignment
for (; col < frag->col; ++col) {
- s->append(space, spaceLen);
+ s.append(space, spaceLen);
}
// get the fragment text
- col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, s);
+ col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, &s);
}
if (multiLine) {
- s->append(eol, eolLen);
+ s.append(eol, eolLen);
}
}
@@ -4543,66 +4542,44 @@ public:
void visitWord(TextWord *word, int begin, int end, const PDFRectangle *selection) override;
void endPage();
- GooString *getText();
- std::vector<TextWordSelection *> **takeWordList(int *nLines);
+ GooString getText();
+ std::vector<std::vector<std::unique_ptr<TextWordSelection>>> takeWordList();
private:
void startLine();
void finishLine();
- std::vector<TextWordSelection *> **lines;
- int nLines, linesSize;
- std::vector<TextWordSelection *> *words;
+ std::vector<std::vector<std::unique_ptr<TextWordSelection>>> lines;
+ std::vector<std::unique_ptr<TextWordSelection>> words;
int tableId;
TextBlock *currentBlock;
};
TextSelectionDumper::TextSelectionDumper(TextPage *p) : TextSelectionVisitor(p)
{
- linesSize = 256;
- lines = (std::vector<TextWordSelection *> **)gmallocn(linesSize, sizeof(std::vector<TextWordSelection *> *));
- nLines = 0;
-
tableId = -1;
currentBlock = nullptr;
- words = nullptr;
}
-TextSelectionDumper::~TextSelectionDumper()
-{
- for (int i = 0; i < nLines; i++) {
- for (auto entry : *(lines[i])) {
- delete entry;
- }
- delete lines[i];
- }
- gfree(static_cast<void *>(lines));
-}
+TextSelectionDumper::~TextSelectionDumper() = default;
void TextSelectionDumper::startLine()
{
finishLine();
- words = new std::vector<TextWordSelection *>();
+ words.clear();
}
void TextSelectionDumper::finishLine()
{
- if (nLines == linesSize) {
- linesSize *= 2;
- lines = (std::vector<TextWordSelection *> **)grealloc(static_cast<void *>(lines), linesSize * sizeof(std::vector<TextWordSelection *> *));
- }
-
- if (words && !words->empty()) {
+ if (!words.empty()) {
// Reverse word order for RTL text. Fixes #53 for glib backend (Evince)
if (!page->primaryLR) {
- std::ranges::reverse(*words);
+ std::ranges::reverse(words);
}
- lines[nLines++] = words;
- } else {
- delete words;
+ lines.push_back(std::move(words));
}
- words = nullptr;
+ words.clear();
}
void TextSelectionDumper::visitLine(TextLine *line, TextWord *begin, TextWord *end, int edge_begin, int edge_end, const PDFRectangle *selection)
@@ -4639,7 +4616,7 @@ void TextSelectionDumper::visitLine(TextLine *line, TextWord *begin, TextWord *e
void TextSelectionDumper::visitWord(TextWord *word, int begin, int end, const PDFRectangle *selection)
{
- words->push_back(new TextWordSelection(word, begin, end));
+ words.push_back(std::make_unique<TextWordSelection>(word, begin, end));
}
void TextSelectionDumper::endPage()
@@ -4647,16 +4624,13 @@ void TextSelectionDumper::endPage()
finishLine();
}
-GooString *TextSelectionDumper::getText()
+GooString TextSelectionDumper::getText()
{
- GooString *text;
- int i;
+ GooString text;
const UnicodeMap *uMap;
char space[8], eol[16];
int spaceLen, eolLen;
- text = new GooString();
-
if (!(uMap = globalParams->getTextEncoding())) {
return text;
}
@@ -4665,39 +4639,31 @@ GooString *TextSelectionDumper::getText()
eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
std::vector<Unicode> uText;
- for (i = 0; i < nLines; i++) {
- std::vector<TextWordSelection *> *lineWords = lines[i];
- for (std::size_t j = 0; j < lineWords->size(); j++) {
- TextWordSelection *sel = (*lineWords)[j];
+ for (size_t i = 0; i < lines.size(); i++) {
+ const auto &line = lines[i];
+ for (size_t j = 0; j < line.size(); j++) {
+ const auto &sel = line[j];
uText.resize(sel->end - sel->begin);
std::transform(sel->word->chars.begin() + sel->begin, sel->word->chars.begin() + sel->end, uText.begin(), [](auto &c) { return c.text; });
- page->dumpFragment(uText.data(), uText.size(), uMap, text);
+ page->dumpFragment(uText.data(), uText.size(), uMap, &text);
- if (j < lineWords->size() - 1 && sel->word->spaceAfter) {
- text->append(space, spaceLen);
+ if (j < line.size() - 1 && sel->word->spaceAfter) {
+ text.append(space, spaceLen);
}
}
- if (i < nLines - 1) {
- text->append(eol, eolLen);
+ if (i < lines.size() - 1) {
+ text.append(eol, eolLen);
}
}
return text;
}
-std::vector<TextWordSelection *> **TextSelectionDumper::takeWordList(int *nLinesOut)
+std::vector<std::vector<std::unique_ptr<TextWordSelection>>> TextSelectionDumper::takeWordList()
{
- std::vector<TextWordSelection *> **returnValue = lines;
-
- *nLinesOut = nLines;
- if (nLines == 0) {
- return nullptr;
- }
-
- nLines = 0;
- lines = nullptr;
-
+ std::vector<std::vector<std::unique_ptr<TextWordSelection>>> returnValue;
+ std::swap(lines, returnValue);
return returnValue;
}
@@ -5338,7 +5304,7 @@ std::vector<PDFRectangle *> *TextPage::getSelectionRegion(const PDFRectangle *se
return sizer.takeRegion();
}
-GooString *TextPage::getSelectionText(const PDFRectangle *selection, SelectionStyle style)
+GooString TextPage::getSelectionText(const PDFRectangle *selection, SelectionStyle style)
{
TextSelectionDumper dumper(this);
@@ -5348,14 +5314,14 @@ GooString *TextPage::getSelectionText(const PDFRectangle *selection, SelectionSt
return dumper.getText();
}
-std::vector<TextWordSelection *> **TextPage::getSelectionWords(const PDFRectangle *selection, SelectionStyle style, int *nLines)
+std::vector<std::vector<std::unique_ptr<TextWordSelection>>> TextPage::getSelectionWords(const PDFRectangle *selection, SelectionStyle style)
{
TextSelectionDumper dumper(this);
visitSelection(&dumper, selection, style);
dumper.endPage();
- return dumper.takeWordList(nLines);
+ return dumper.takeWordList();
}
bool TextPage::findCharRange(int pos, int length, double *xMin, double *yMin, double *xMax, double *yMax) const
@@ -6060,7 +6026,7 @@ bool TextOutputDev::findText(const Unicode *s, int len, bool startAtTop, bool st
return text->findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast, caseSensitive, backward, wholeWord, xMin, yMin, xMax, yMax);
}
-GooString *TextOutputDev::getText(double xMin, double yMin, double xMax, double yMax) const
+GooString TextOutputDev::getText(double xMin, double yMin, double xMax, double yMax) const
{
return text->getText(xMin, yMin, xMax, yMax, textEOL);
}
@@ -6075,7 +6041,7 @@ std::vector<PDFRectangle *> *TextOutputDev::getSelectionRegion(const PDFRectangl
return text->getSelectionRegion(selection, style, scale);
}
-GooString *TextOutputDev::getSelectionText(const PDFRectangle *selection, SelectionStyle style)
+GooString TextOutputDev::getSelectionText(const PDFRectangle *selection, SelectionStyle style)
{
return text->getSelectionText(selection, style);
}
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index 20a71ae8..4ca5630e 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -637,7 +637,7 @@ public:
double *xMax, double *yMax, PDFRectangle *continueMatch, bool *ignoredHyphen);
// Get the text which is inside the specified rectangle.
- GooString *getText(double xMin, double yMin, double xMax, double yMax, EndOfLineKind textEOL) const;
+ GooString getText(double xMin, double yMin, double xMax, double yMax, EndOfLineKind textEOL) const;
void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style);
@@ -645,9 +645,9 @@ public:
std::vector<PDFRectangle *> *getSelectionRegion(const PDFRectangle *selection, SelectionStyle style, double scale);
- GooString *getSelectionText(const PDFRectangle *selection, SelectionStyle style);
+ GooString getSelectionText(const PDFRectangle *selection, SelectionStyle style);
- std::vector<TextWordSelection *> **getSelectionWords(const PDFRectangle *selection, SelectionStyle style, int *nLines);
+ [[nodiscard]] std::vector<std::vector<std::unique_ptr<TextWordSelection>>> getSelectionWords(const PDFRectangle *selection, SelectionStyle style);
// Find a string by character position and length. If found, sets
// the text bounding rectangle and returns true; otherwise returns
@@ -850,7 +850,7 @@ public:
bool findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool backward, bool wholeWord, double *xMin, double *yMin, double *xMax, double *yMax) const;
// Get the text which is inside the specified rectangle.
- GooString *getText(double xMin, double yMin, double xMax, double yMax) const;
+ GooString getText(double xMin, double yMin, double xMax, double yMax) const;
// Find a string by character position and length. If found, sets
// the text bounding rectangle and returns true; otherwise returns
@@ -861,7 +861,7 @@ public:
std::vector<PDFRectangle *> *getSelectionRegion(const PDFRectangle *selection, SelectionStyle style, double scale);
- GooString *getSelectionText(const PDFRectangle *selection, SelectionStyle style);
+ GooString getSelectionText(const PDFRectangle *selection, SelectionStyle style);
// If true, will combine characters when a base and combining
// character are drawn on eachother.
diff --git a/qt5/src/poppler-page.cc b/qt5/src/poppler-page.cc
index 96ff2875..08eb7f27 100644
--- a/qt5/src/poppler-page.cc
+++ b/qt5/src/poppler-page.cc
@@ -678,7 +678,7 @@ QImage Page::thumbnail() const
QString Page::text(const QRectF &r, TextLayout textLayout) const
{
TextOutputDev *output_dev;
- GooString *s;
+ GooString s;
QString result;
const bool rawOrder = textLayout == RawOrderLayout;
@@ -695,10 +695,9 @@ QString Page::text(const QRectF &r, TextLayout textLayout) const
s = output_dev->getText(r.left(), r.top(), r.right(), r.bottom());
}
- result = QString::fromStdString(s->toStr());
+ result = QString::fromStdString(s.toStr());
delete output_dev;
- delete s;
return result;
}
diff --git a/qt6/src/poppler-page.cc b/qt6/src/poppler-page.cc
index 8b99894b..8c88142b 100644
--- a/qt6/src/poppler-page.cc
+++ b/qt6/src/poppler-page.cc
@@ -678,7 +678,7 @@ QImage Page::thumbnail() const
QString Page::text(const QRectF &r, TextLayout textLayout) const
{
TextOutputDev *output_dev;
- GooString *s;
+ GooString s;
QString result;
const bool rawOrder = textLayout == RawOrderLayout;
@@ -695,10 +695,9 @@ QString Page::text(const QRectF &r, TextLayout textLayout) const
s = output_dev->getText(r.left(), r.top(), r.right(), r.bottom());
}
- result = QString::fromStdString(s->toStr());
+ result = QString::fromStdString(s.toStr());
delete output_dev;
- delete s;
return result;
}
diff --git a/test/perf-test.cc b/test/perf-test.cc
index 6526a04b..5f4b71b9 100644
--- a/test/perf-test.cc
+++ b/test/perf-test.cc
@@ -608,7 +608,6 @@ static bool ShowPreview()
static void RenderPdfAsText(const char *fileName)
{
PDFDoc *pdfDoc = nullptr;
- GooString *txt = nullptr;
int pageCount;
double timeInMs;
@@ -650,15 +649,13 @@ static void RenderPdfAsText(const char *fileName)
bool crop = true;
bool doLinks = false;
pdfDoc->displayPage(textOut, curPage, 72, 72, rotate, useMediaBox, crop, doLinks);
- txt = textOut->getText(0.0, 0.0, 10000.0, 10000.0);
+ GooString txt = textOut->getText(0.0, 0.0, 10000.0, 10000.0);
msTimer.stop();
timeInMs = msTimer.getElapsed();
if (gfTimings) {
LogInfo("page %d: %.2f ms\n", curPage, timeInMs);
}
- printf("%s\n", txt->c_str());
- delete txt;
- txt = nullptr;
+ printf("%s\n", txt.c_str());
}
Exit: