summaryrefslogtreecommitdiff
path: root/glib/tests/check_text.c
blob: 9ab99ba68f5516d826c1aa9936eaa8d66d394204 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
/*
 * testing program for the get_text function
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <poppler.h>

/*
 * main
 */
int main(int argc, char *argv[])
{
    GFile *infile;
    PopplerDocument *doc;
    PopplerPage *page;
    PopplerRectangle *areas = NULL;
    guint n_glyph_areas, n_utf8_chars;
    int npages, n;
    char *text;
    GError *err = NULL;

    /* open file */

    infile = g_file_new_for_path(TESTDATADIR "/unittestcases/WithActualText.pdf");
    if (!infile) {
        exit(EXIT_FAILURE);
    }

    doc = poppler_document_new_from_gfile(infile, NULL, NULL, &err);
    if (doc == NULL) {
        g_printerr("error opening pdf file: %s\n", err->message);
        g_error_free(err);
        exit(EXIT_FAILURE);
    }

    /* pages */

    npages = poppler_document_get_n_pages(doc);
    if (npages < 1) {
        g_printerr("no page in document\n");
        exit(EXIT_FAILURE);
    }

    /* check text */

    n = 0;
    page = poppler_document_get_page(doc, n);
    text = poppler_page_get_text(page);
    g_print("%s\n", text);
    g_assert_cmpstr(text, ==, "The slow brown fox jumps over the black dog.");

    /* Cleanup vars for next test */
    g_clear_object(&page);
    g_clear_object(&doc);
    g_clear_object(&infile);
    g_clear_pointer(&text, g_free);

    /* Test for consistency between utf8 characters returned by poppler_page_get_text()
     * and glyph layout areas returned by poppler_page_get_text_layout(). Issue #1100 */
    g_print("Consistency test between poppler_page_get_text() and poppler_page_get_text_layout()\n");
    g_print("Issue #1100 \n");
    infile = g_file_new_for_path(TESTDATADIR "/unittestcases/searchAcrossLines.pdf");
    if (!infile) {
        exit(EXIT_FAILURE);
    }

    doc = poppler_document_new_from_gfile(infile, NULL, NULL, &err);
    if (doc == NULL) {
        g_printerr("error opening pdf file: %s\n", err->message);
        g_error_free(err);
        exit(EXIT_FAILURE);
    }

    page = poppler_document_get_page(doc, 0);
    if (page == NULL || !POPPLER_IS_PAGE(page)) {
        g_print("error opening pdf page\n");
        exit(EXIT_FAILURE);
    }

    text = poppler_page_get_text(page);
    n_utf8_chars = (guint)g_utf8_strlen(text, -1);
    poppler_page_get_text_layout(page, &areas, &n_glyph_areas);
    g_assert_cmpuint(n_glyph_areas, ==, n_utf8_chars);
    g_print("Test: OK ('layout glyph areas' match amount of 'utf8 characters')\n");

    /* Cleanup vars for next test */
    g_clear_object(&page);
    g_clear_object(&doc);
    g_clear_object(&infile);
    g_clear_pointer(&areas, g_free);
    g_clear_pointer(&text, g_free);

    return EXIT_SUCCESS;
}