diff options
author | Sune Vuorela <sune@vuorela.dk> | 2024-12-19 13:34:01 +0100 |
---|---|---|
committer | Albert Astals Cid <aacid@kde.org> | 2024-12-20 17:02:06 +0000 |
commit | d2609165be8815cfa613f71df7369c59a41f7a18 (patch) | |
tree | e4104b5c2bc46babad3a92e1f115948ca32642f2 | |
parent | a3fd179de9002c26da5cac62ca728894808e6513 (diff) |
TextOutputDev: memory cleanups
Return some GooStrings by value, and convert a std::vector** to a vector
of vectors
-rw-r--r-- | glib/poppler-page.cc | 73 | ||||
-rw-r--r-- | poppler/TextOutputDev.cc | 108 | ||||
-rw-r--r-- | poppler/TextOutputDev.h | 10 | ||||
-rw-r--r-- | qt5/src/poppler-page.cc | 5 | ||||
-rw-r--r-- | qt6/src/poppler-page.cc | 5 | ||||
-rw-r--r-- | test/perf-test.cc | 7 |
6 files changed, 74 insertions, 134 deletions
diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc index 38e5e680..29871887 100644 --- a/glib/poppler-page.cc +++ b/glib/poppler-page.cc @@ -726,7 +726,6 @@ cairo_region_t *poppler_page_get_selected_region(PopplerPage *page, gdouble scal **/ char *poppler_page_get_selected_text(PopplerPage *page, PopplerSelectionStyle style, PopplerRectangle *selection) { - GooString *sel_text; char *result; TextPage *text; SelectionStyle selection_style = selectionStyleGlyph; @@ -753,9 +752,8 @@ char *poppler_page_get_selected_text(PopplerPage *page, PopplerSelectionStyle st } text = poppler_page_get_text_page(page); - sel_text = text->getSelectionText(&pdf_selection, selection_style); - result = g_strdup(sel_text->c_str()); - delete sel_text; + GooString sel_text = text->getSelectionText(&pdf_selection, selection_style); + result = g_strdup(sel_text.c_str()); return result; } @@ -2327,12 +2325,10 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang TextPage *text; PopplerRectangle *rect; PDFRectangle selection; - int i, k; guint offset = 0; guint n_rects = 0; gdouble x1, y1, x2, y2; gdouble x3, y3, x4, y4; - int n_lines; g_return_val_if_fail(POPPLER_IS_PAGE(page), FALSE); g_return_val_if_fail(area != nullptr, FALSE); @@ -2345,19 +2341,18 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang selection.y2 = area->y2; text = poppler_page_get_text_page(page); - std::vector<TextWordSelection *> **word_list = text->getSelectionWords(&selection, selectionStyleGlyph, &n_lines); - if (!word_list) { + std::vector<std::vector<std::unique_ptr<TextWordSelection>>> word_list = text->getSelectionWords(&selection, selectionStyleGlyph); + if (word_list.empty()) { return FALSE; } - n_rects += n_lines - 1; - for (i = 0; i < n_lines; i++) { - std::vector<TextWordSelection *> *line_words = word_list[i]; - n_rects += line_words->size() - 1; - for (std::size_t j = 0; j < line_words->size(); j++) { - const TextWordSelection *word_sel = (*line_words)[j]; + n_rects += word_list.size() - 1; + for (const std::vector<std::unique_ptr<TextWordSelection>> &line_words : word_list) { + n_rects += line_words.size() - 1; + for (std::size_t j = 0; j < line_words.size(); j++) { + const TextWordSelection *word_sel = line_words[j].get(); n_rects += word_sel->getEnd() - word_sel->getBegin(); - if (!word_sel->getWord()->hasSpaceAfter() && j < line_words->size() - 1) { + if (!word_sel->getWord()->hasSpaceAfter() && j < line_words.size() - 1) { n_rects--; } } @@ -2366,14 +2361,14 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang *rectangles = g_new(PopplerRectangle, n_rects); *n_rectangles = n_rects; - for (i = 0; i < n_lines; i++) { - std::vector<TextWordSelection *> *line_words = word_list[i]; - for (std::size_t j = 0; j < line_words->size(); j++) { - TextWordSelection *word_sel = (*line_words)[j]; + for (size_t i = 0; i < word_list.size(); i++) { + std::vector<std::unique_ptr<TextWordSelection>> &line_words = word_list[i]; + for (std::size_t j = 0; j < line_words.size(); j++) { + TextWordSelection *word_sel = line_words[j].get(); const TextWord *word = word_sel->getWord(); int end = word_sel->getEnd(); - for (k = word_sel->getBegin(); k < end; k++) { + for (int k = word_sel->getBegin(); k < end; k++) { rect = *rectangles + offset; word->getCharBBox(k, &(rect->x1), &(rect->y1), &(rect->x2), &(rect->y2)); offset++; @@ -2382,8 +2377,8 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang rect = *rectangles + offset; word->getBBox(&x1, &y1, &x2, &y2); - if (word->hasSpaceAfter() && j < line_words->size() - 1) { - TextWordSelection *next_word_sel = (*line_words)[j + 1]; + if (word->hasSpaceAfter() && j < line_words.size() - 1) { + TextWordSelection *next_word_sel = line_words[j + 1].get(); next_word_sel->getWord()->getBBox(&x3, &y3, &x4, &y4); // space is from one word to other and with the same height as @@ -2394,11 +2389,9 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang rect->y2 = y2; offset++; } - - delete word_sel; } - if (i < n_lines - 1 && offset > 0) { + if (i < word_list.size() - 1 && offset > 0) { // end of line rect->x1 = x2; rect->y1 = y2; @@ -2406,12 +2399,7 @@ gboolean poppler_page_get_text_layout_for_area(PopplerPage *page, PopplerRectang rect->y2 = y2; offset++; } - - delete line_words; } - - gfree(static_cast<void *>(word_list)); - return TRUE; } @@ -2503,11 +2491,9 @@ GList *poppler_page_get_text_attributes_for_area(PopplerPage *page, PopplerRecta { TextPage *text; PDFRectangle selection; - int n_lines; PopplerTextAttributes *attrs = nullptr; const TextWord *word, *prev_word = nullptr; gint word_i, prev_word_i; - gint i; gint offset = 0; GList *attributes = nullptr; @@ -2520,15 +2506,15 @@ GList *poppler_page_get_text_attributes_for_area(PopplerPage *page, PopplerRecta selection.y2 = area->y2; text = poppler_page_get_text_page(page); - std::vector<TextWordSelection *> **word_list = text->getSelectionWords(&selection, selectionStyleGlyph, &n_lines); - if (!word_list) { + std::vector<std::vector<std::unique_ptr<TextWordSelection>>> word_list = text->getSelectionWords(&selection, selectionStyleGlyph); + if (word_list.empty()) { return nullptr; } - for (i = 0; i < n_lines; i++) { - std::vector<TextWordSelection *> *line_words = word_list[i]; - for (std::size_t j = 0; j < line_words->size(); j++) { - TextWordSelection *word_sel = (*line_words)[j]; + for (size_t i = 0; i < word_list.size(); i++) { + std::vector<std::unique_ptr<TextWordSelection>> &line_words = word_list[i]; + for (std::size_t j = 0; j < line_words.size(); j++) { + TextWordSelection *word_sel = line_words[j].get(); int end = word_sel->getEnd(); word = word_sel->getWord(); @@ -2545,23 +2531,16 @@ GList *poppler_page_get_text_attributes_for_area(PopplerPage *page, PopplerRecta prev_word_i = word_i; } - if (word->hasSpaceAfter() && j < line_words->size() - 1) { + if (word->hasSpaceAfter() && j < line_words.size() - 1) { attrs->end_index = offset; offset++; } - - delete word_sel; } - if (i < n_lines - 1) { + if (i < word_list.size() - 1) { attrs->end_index = offset; offset++; } - - delete line_words; } - - gfree(static_cast<void *>(word_list)); - return g_list_reverse(attributes); } diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 7a817ee8..1a53b5e2 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -4284,9 +4284,8 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB return false; } -GooString *TextPage::getText(double xMin, double yMin, double xMax, double yMax, EndOfLineKind textEOL) const +GooString TextPage::getText(double xMin, double yMin, double xMax, double yMax, EndOfLineKind textEOL) const { - GooString *s; const UnicodeMap *uMap; TextBlock *blk; TextLine *line; @@ -4300,7 +4299,7 @@ GooString *TextPage::getText(double xMin, double yMin, double xMax, double yMax, int col, idx0, idx1, i, j; bool multiLine, oneRot; - s = new GooString(); + GooString s; // get the output encoding if (!(uMap = globalParams->getTextEncoding())) { @@ -4318,7 +4317,7 @@ GooString *TextPage::getText(double xMin, double yMin, double xMax, double yMax, word->getCharBBox(j, &gXMin, &gYMin, &gXMax, &gYMax); if (xMin <= gXMin && gXMax <= xMax && yMin <= gYMin && gYMax <= yMax) { mbc_len = uMap->mapUnicode(*(word->getChar(j)), mbc, sizeof(mbc)); - s->append(mbc, mbc_len); + s.append(mbc, mbc_len); } } } @@ -4489,22 +4488,22 @@ GooString *TextPage::getText(double xMin, double yMin, double xMax, double yMax, // insert a return if (frag->col < col || (i > 0 && fabs(frag->base - frags[i - 1].base) > maxIntraLineDelta * frags[i - 1].line->words->fontSize)) { - s->append(eol, eolLen); + s.append(eol, eolLen); col = 0; multiLine = true; } // column alignment for (; col < frag->col; ++col) { - s->append(space, spaceLen); + s.append(space, spaceLen); } // get the fragment text - col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, s); + col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, &s); } if (multiLine) { - s->append(eol, eolLen); + s.append(eol, eolLen); } } @@ -4543,66 +4542,44 @@ public: void visitWord(TextWord *word, int begin, int end, const PDFRectangle *selection) override; void endPage(); - GooString *getText(); - std::vector<TextWordSelection *> **takeWordList(int *nLines); + GooString getText(); + std::vector<std::vector<std::unique_ptr<TextWordSelection>>> takeWordList(); private: void startLine(); void finishLine(); - std::vector<TextWordSelection *> **lines; - int nLines, linesSize; - std::vector<TextWordSelection *> *words; + std::vector<std::vector<std::unique_ptr<TextWordSelection>>> lines; + std::vector<std::unique_ptr<TextWordSelection>> words; int tableId; TextBlock *currentBlock; }; TextSelectionDumper::TextSelectionDumper(TextPage *p) : TextSelectionVisitor(p) { - linesSize = 256; - lines = (std::vector<TextWordSelection *> **)gmallocn(linesSize, sizeof(std::vector<TextWordSelection *> *)); - nLines = 0; - tableId = -1; currentBlock = nullptr; - words = nullptr; } -TextSelectionDumper::~TextSelectionDumper() -{ - for (int i = 0; i < nLines; i++) { - for (auto entry : *(lines[i])) { - delete entry; - } - delete lines[i]; - } - gfree(static_cast<void *>(lines)); -} +TextSelectionDumper::~TextSelectionDumper() = default; void TextSelectionDumper::startLine() { finishLine(); - words = new std::vector<TextWordSelection *>(); + words.clear(); } void TextSelectionDumper::finishLine() { - if (nLines == linesSize) { - linesSize *= 2; - lines = (std::vector<TextWordSelection *> **)grealloc(static_cast<void *>(lines), linesSize * sizeof(std::vector<TextWordSelection *> *)); - } - - if (words && !words->empty()) { + if (!words.empty()) { // Reverse word order for RTL text. Fixes #53 for glib backend (Evince) if (!page->primaryLR) { - std::ranges::reverse(*words); + std::ranges::reverse(words); } - lines[nLines++] = words; - } else { - delete words; + lines.push_back(std::move(words)); } - words = nullptr; + words.clear(); } void TextSelectionDumper::visitLine(TextLine *line, TextWord *begin, TextWord *end, int edge_begin, int edge_end, const PDFRectangle *selection) @@ -4639,7 +4616,7 @@ void TextSelectionDumper::visitLine(TextLine *line, TextWord *begin, TextWord *e void TextSelectionDumper::visitWord(TextWord *word, int begin, int end, const PDFRectangle *selection) { - words->push_back(new TextWordSelection(word, begin, end)); + words.push_back(std::make_unique<TextWordSelection>(word, begin, end)); } void TextSelectionDumper::endPage() @@ -4647,16 +4624,13 @@ void TextSelectionDumper::endPage() finishLine(); } -GooString *TextSelectionDumper::getText() +GooString TextSelectionDumper::getText() { - GooString *text; - int i; + GooString text; const UnicodeMap *uMap; char space[8], eol[16]; int spaceLen, eolLen; - text = new GooString(); - if (!(uMap = globalParams->getTextEncoding())) { return text; } @@ -4665,39 +4639,31 @@ GooString *TextSelectionDumper::getText() eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); std::vector<Unicode> uText; - for (i = 0; i < nLines; i++) { - std::vector<TextWordSelection *> *lineWords = lines[i]; - for (std::size_t j = 0; j < lineWords->size(); j++) { - TextWordSelection *sel = (*lineWords)[j]; + for (size_t i = 0; i < lines.size(); i++) { + const auto &line = lines[i]; + for (size_t j = 0; j < line.size(); j++) { + const auto &sel = line[j]; uText.resize(sel->end - sel->begin); std::transform(sel->word->chars.begin() + sel->begin, sel->word->chars.begin() + sel->end, uText.begin(), [](auto &c) { return c.text; }); - page->dumpFragment(uText.data(), uText.size(), uMap, text); + page->dumpFragment(uText.data(), uText.size(), uMap, &text); - if (j < lineWords->size() - 1 && sel->word->spaceAfter) { - text->append(space, spaceLen); + if (j < line.size() - 1 && sel->word->spaceAfter) { + text.append(space, spaceLen); } } - if (i < nLines - 1) { - text->append(eol, eolLen); + if (i < lines.size() - 1) { + text.append(eol, eolLen); } } return text; } -std::vector<TextWordSelection *> **TextSelectionDumper::takeWordList(int *nLinesOut) +std::vector<std::vector<std::unique_ptr<TextWordSelection>>> TextSelectionDumper::takeWordList() { - std::vector<TextWordSelection *> **returnValue = lines; - - *nLinesOut = nLines; - if (nLines == 0) { - return nullptr; - } - - nLines = 0; - lines = nullptr; - + std::vector<std::vector<std::unique_ptr<TextWordSelection>>> returnValue; + std::swap(lines, returnValue); return returnValue; } @@ -5338,7 +5304,7 @@ std::vector<PDFRectangle *> *TextPage::getSelectionRegion(const PDFRectangle *se return sizer.takeRegion(); } -GooString *TextPage::getSelectionText(const PDFRectangle *selection, SelectionStyle style) +GooString TextPage::getSelectionText(const PDFRectangle *selection, SelectionStyle style) { TextSelectionDumper dumper(this); @@ -5348,14 +5314,14 @@ GooString *TextPage::getSelectionText(const PDFRectangle *selection, SelectionSt return dumper.getText(); } -std::vector<TextWordSelection *> **TextPage::getSelectionWords(const PDFRectangle *selection, SelectionStyle style, int *nLines) +std::vector<std::vector<std::unique_ptr<TextWordSelection>>> TextPage::getSelectionWords(const PDFRectangle *selection, SelectionStyle style) { TextSelectionDumper dumper(this); visitSelection(&dumper, selection, style); dumper.endPage(); - return dumper.takeWordList(nLines); + return dumper.takeWordList(); } bool TextPage::findCharRange(int pos, int length, double *xMin, double *yMin, double *xMax, double *yMax) const @@ -6060,7 +6026,7 @@ bool TextOutputDev::findText(const Unicode *s, int len, bool startAtTop, bool st return text->findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast, caseSensitive, backward, wholeWord, xMin, yMin, xMax, yMax); } -GooString *TextOutputDev::getText(double xMin, double yMin, double xMax, double yMax) const +GooString TextOutputDev::getText(double xMin, double yMin, double xMax, double yMax) const { return text->getText(xMin, yMin, xMax, yMax, textEOL); } @@ -6075,7 +6041,7 @@ std::vector<PDFRectangle *> *TextOutputDev::getSelectionRegion(const PDFRectangl return text->getSelectionRegion(selection, style, scale); } -GooString *TextOutputDev::getSelectionText(const PDFRectangle *selection, SelectionStyle style) +GooString TextOutputDev::getSelectionText(const PDFRectangle *selection, SelectionStyle style) { return text->getSelectionText(selection, style); } diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 20a71ae8..4ca5630e 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -637,7 +637,7 @@ public: double *xMax, double *yMax, PDFRectangle *continueMatch, bool *ignoredHyphen); // Get the text which is inside the specified rectangle. - GooString *getText(double xMin, double yMin, double xMax, double yMax, EndOfLineKind textEOL) const; + GooString getText(double xMin, double yMin, double xMax, double yMax, EndOfLineKind textEOL) const; void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style); @@ -645,9 +645,9 @@ public: std::vector<PDFRectangle *> *getSelectionRegion(const PDFRectangle *selection, SelectionStyle style, double scale); - GooString *getSelectionText(const PDFRectangle *selection, SelectionStyle style); + GooString getSelectionText(const PDFRectangle *selection, SelectionStyle style); - std::vector<TextWordSelection *> **getSelectionWords(const PDFRectangle *selection, SelectionStyle style, int *nLines); + [[nodiscard]] std::vector<std::vector<std::unique_ptr<TextWordSelection>>> getSelectionWords(const PDFRectangle *selection, SelectionStyle style); // Find a string by character position and length. If found, sets // the text bounding rectangle and returns true; otherwise returns @@ -850,7 +850,7 @@ public: bool findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool backward, bool wholeWord, double *xMin, double *yMin, double *xMax, double *yMax) const; // Get the text which is inside the specified rectangle. - GooString *getText(double xMin, double yMin, double xMax, double yMax) const; + GooString getText(double xMin, double yMin, double xMax, double yMax) const; // Find a string by character position and length. If found, sets // the text bounding rectangle and returns true; otherwise returns @@ -861,7 +861,7 @@ public: std::vector<PDFRectangle *> *getSelectionRegion(const PDFRectangle *selection, SelectionStyle style, double scale); - GooString *getSelectionText(const PDFRectangle *selection, SelectionStyle style); + GooString getSelectionText(const PDFRectangle *selection, SelectionStyle style); // If true, will combine characters when a base and combining // character are drawn on eachother. diff --git a/qt5/src/poppler-page.cc b/qt5/src/poppler-page.cc index 96ff2875..08eb7f27 100644 --- a/qt5/src/poppler-page.cc +++ b/qt5/src/poppler-page.cc @@ -678,7 +678,7 @@ QImage Page::thumbnail() const QString Page::text(const QRectF &r, TextLayout textLayout) const { TextOutputDev *output_dev; - GooString *s; + GooString s; QString result; const bool rawOrder = textLayout == RawOrderLayout; @@ -695,10 +695,9 @@ QString Page::text(const QRectF &r, TextLayout textLayout) const s = output_dev->getText(r.left(), r.top(), r.right(), r.bottom()); } - result = QString::fromStdString(s->toStr()); + result = QString::fromStdString(s.toStr()); delete output_dev; - delete s; return result; } diff --git a/qt6/src/poppler-page.cc b/qt6/src/poppler-page.cc index 8b99894b..8c88142b 100644 --- a/qt6/src/poppler-page.cc +++ b/qt6/src/poppler-page.cc @@ -678,7 +678,7 @@ QImage Page::thumbnail() const QString Page::text(const QRectF &r, TextLayout textLayout) const { TextOutputDev *output_dev; - GooString *s; + GooString s; QString result; const bool rawOrder = textLayout == RawOrderLayout; @@ -695,10 +695,9 @@ QString Page::text(const QRectF &r, TextLayout textLayout) const s = output_dev->getText(r.left(), r.top(), r.right(), r.bottom()); } - result = QString::fromStdString(s->toStr()); + result = QString::fromStdString(s.toStr()); delete output_dev; - delete s; return result; } diff --git a/test/perf-test.cc b/test/perf-test.cc index 6526a04b..5f4b71b9 100644 --- a/test/perf-test.cc +++ b/test/perf-test.cc @@ -608,7 +608,6 @@ static bool ShowPreview() static void RenderPdfAsText(const char *fileName) { PDFDoc *pdfDoc = nullptr; - GooString *txt = nullptr; int pageCount; double timeInMs; @@ -650,15 +649,13 @@ static void RenderPdfAsText(const char *fileName) bool crop = true; bool doLinks = false; pdfDoc->displayPage(textOut, curPage, 72, 72, rotate, useMediaBox, crop, doLinks); - txt = textOut->getText(0.0, 0.0, 10000.0, 10000.0); + GooString txt = textOut->getText(0.0, 0.0, 10000.0, 10000.0); msTimer.stop(); timeInMs = msTimer.getElapsed(); if (gfTimings) { LogInfo("page %d: %.2f ms\n", curPage, timeInMs); } - printf("%s\n", txt->c_str()); - delete txt; - txt = nullptr; + printf("%s\n", txt.c_str()); } Exit: |