summaryrefslogtreecommitdiff
blob: 45437356cd7a5b05d3cb2ca3005fd2a916e9a6d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/*====================================================================*
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
 -
 -  Redistribution and use in source and binary forms, with or without
 -  modification, are permitted provided that the following conditions
 -  are met:
 -  1. Redistributions of source code must retain the above copyright
 -     notice, this list of conditions and the following disclaimer.
 -  2. Redistributions in binary form must reproduce the above
 -     copyright notice, this list of conditions and the following
 -     disclaimer in the documentation and/or other materials
 -     provided with the distribution.
 -
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *====================================================================*/

/*
 * comparepages.c
 *
 *    This compares text pages using the location of word bounding boxes.
 *    The goal is to get a fast and robust determination for whether
 *    two pages are the same.
 */

#ifdef HAVE_CONFIG_H
#include <config_auto.h>
#endif  /* HAVE_CONFIG_H */

#include "allheaders.h"

int main(int    argc,
         char **argv)
{
l_int32  w, h, n, same;
BOXA    *boxa1, *boxa2;
NUMA    *nai1, *nai2;
NUMAA   *naa1, *naa2;
PIX     *pixs, *pixt, *pixb1, *pixb2;

    setLeptDebugOK(1);
    lept_mkdir("lept/comp");

    pixs = pixRead("lucasta.047.jpg");
    pixb1 = pixConvertTo1(pixs, 128);
    pixGetWordBoxesInTextlines(pixb1, 10, 10, 500, 50, &boxa1, &nai1);
    pixt = pixDrawBoxaRandom(pixs, boxa1, 2);
    pixDisplay(pixt, 100, 100);
    pixWrite("/tmp/lept/comp/pixt.png", pixt, IFF_PNG);
    naa1 = boxaExtractSortedPattern(boxa1, nai1);
    numaaWrite("/tmp/lept/comp/naa1.naa", naa1);
    n = numaaGetCount(naa1);
    lept_stderr("Number of textlines = %d\n", n);
    pixDisplay(pixb1, 300, 0);

        /* Translate */
    pixb2 = pixCreateTemplate(pixb1);
    pixGetDimensions(pixb1, &w, &h, NULL);
    pixRasterop(pixb2, 148, 133, w, h, PIX_SRC, pixb1, 0, 0);
    pixDisplay(pixb2, 600, 0);
    pixGetWordBoxesInTextlines(pixb2, 10, 10, 500, 50, &boxa2, &nai2);
    naa2 = boxaExtractSortedPattern(boxa2, nai2);
    numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1);
    lept_stderr("Translation.  same?: %d\n\n", same);
    boxaDestroy(&boxa2);
    numaDestroy(&nai2);
    pixDestroy(&pixb2);
    numaaDestroy(&naa2);

        /* Aligned part is below h/3 */
    pixb2 = pixCreateTemplate(pixb1);
    pixGetDimensions(pixb1, &w, &h, NULL);
    pixRasterop(pixb2, 0, 0, w, h / 3, PIX_SRC, pixb1, 0, 2 * h / 3);
    pixRasterop(pixb2, 0, h / 3, w, 2 * h / 3, PIX_SRC, pixb1, 0, h / 3);
    pixDisplay(pixb2, 900, 0);
    pixGetWordBoxesInTextlines(pixb2, 10, 10, 500, 50, &boxa2, &nai2);
    naa2 = boxaExtractSortedPattern(boxa2, nai2);
    numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1);
    lept_stderr("Aligned part below h/3.  same?: %d\n\n", same);
    boxaDestroy(&boxa2);
    numaDestroy(&nai2);
    pixDestroy(&pixb2);
    numaaDestroy(&naa2);

        /* Top and bottom switched; no aligned parts */
    pixb2 = pixCreateTemplate(pixb1);
    pixGetDimensions(pixb1, &w, &h, NULL);
    pixRasterop(pixb2, 0, 0, w, h / 3, PIX_SRC, pixb1, 0, 2 * h / 3);
    pixRasterop(pixb2, 0, h / 3, w, 2 * h / 3, PIX_SRC, pixb1, 0, 0);
    pixDisplay(pixb2, 1200, 0);
    pixGetWordBoxesInTextlines(pixb2, 10, 10, 500, 50, &boxa2, &nai2);
    naa2 = boxaExtractSortedPattern(boxa2, nai2);
    numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1);
    lept_stderr("Top/Bot switched; no alignment.  Same?: %d\n", same);
    boxaDestroy(&boxa2);
    numaDestroy(&nai2);
    pixDestroy(&pixb2);
    numaaDestroy(&naa2);

    boxaDestroy(&boxa1);
    numaDestroy(&nai1);
    pixDestroy(&pixs);
    pixDestroy(&pixb1);
    pixDestroy(&pixt);
    numaaDestroy(&naa1);
    return 0;
}