summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--app-text/tesseract/Manifest36
-rw-r--r--app-text/tesseract/metadata.xml16
-rw-r--r--app-text/tesseract/tesseract-3.00.ebuild98
3 files changed, 150 insertions, 0 deletions
diff --git a/app-text/tesseract/Manifest b/app-text/tesseract/Manifest
new file mode 100644
index 0000000..10b3ad2
--- /dev/null
+++ b/app-text/tesseract/Manifest
@@ -0,0 +1,36 @@
+DIST bul.traineddata.gz 848731 RMD160 cbcdb7532af8c03abb5f8bcdb5b34516b6a522e4 SHA1 a9efae5e347a36ea90bd2ad357e732ad4da47fd3 SHA256 fc3c650568d014f2337052658e9ddd8efaff6ac74c9a90952d740eafdf400e2d
+DIST cat.traineddata.gz 995008 RMD160 3b1a5067e97e4c104ee766fc89cc47c261b55b38 SHA1 0301a9c81c1d646bd1b135ca89476fb63bd634f8 SHA256 dcc52db7522c67d2629117b7fde4c114ca51677f7f1efbedc12bf1e7319acde2
+DIST ces.traineddata.gz 1059966 RMD160 356bc1982be43cb89e1fa62b1af53a0f28d14444 SHA1 dbec19aa23f42a08e6b195a96e64b443f7519620 SHA256 9848a4c137be45659e29f4aa07ccb6357fd4401307478cce0ed5f8e6686480d2
+DIST chi_sim.traineddata.gz 19732398 RMD160 7a3e5f0f33f9869c10f793daee4883a5167eb178 SHA1 35f0254f159edeed509ec1e0779073bf998b6cdb SHA256 40c1cdf62106a0705807e8be5bee235a9e9efe716e22568e3885561a835d7d5b
+DIST chi_tra.traineddata.gz 27512772 RMD160 590262a166a86f55177dde0dd0b10926a91c1ef4 SHA1 a9798de7e068d85613602aa33a153da721aadc82 SHA256 68ac2d83998b7f6e3fa26a58be8be5507e78185a16b3e106fe48a7b2667165fc
+DIST dan.traineddata.gz 958449 RMD160 f354451e42486609ff631b5a3ba52134cc0a706f SHA1 bfac9c00d28fc4b19034c2098d41087a173084ae SHA256 2104d8b1cfde6fa960e31db826a101a356c77c3efeae6e78cab5a97e62f3410c
+DIST deu.traineddata.gz 965684 RMD160 f5e26562dfb3259c0c6fca0c9d1a23589f6af981 SHA1 c4b3ecde18ce9f114faba88cdfd0308f90801266 SHA256 9e1845a69d5b6cf93d4fa05d5b8934e7cfaf8b088e6be9e8dac30c7859591ccc
+DIST ell.traineddata.gz 944284 RMD160 791acfb5a60875d02e23ebc8c66243adedb5cf39 SHA1 b7a449fc320cc579a729c0352e5cc642f565e64e SHA256 f8cb765c40733a677fa470370b935c34cfc53ba8de372b33e83ab59c7407195d
+DIST eng.traineddata.gz 742852 RMD160 d35ef8383c58faa6765e2eceb4da11fc688f488b SHA1 bf72c6c4542d67ce1d6def9d4e8432c7e98a654b SHA256 3ad150c58d59aa7f20e12bed0e15a33c988f5803d6cba8ef366aeec0c18d4e9d
+DIST fin.traineddata.gz 959833 RMD160 a467774acf081aff2bd7fc5e1557eb1486336b6f SHA1 004d74d13f7b53cbefb86e2ba12bc67dce81d936 SHA256 30c05d403295fb659048b5d82db0d87c9cbee5077314e2b52f6fdca9c9ec8520
+DIST fra.traineddata.gz 933372 RMD160 97ab2fb064da5d7d15032fd1c5e87aa2f14dc1b3 SHA1 8d698bb3b659e862b3274970a57b3214de76f1ff SHA256 1d795f1da1ee20671ffbbf4b2997b424f459861b217c9ffa7abbeb72abd760d6
+DIST hun.traineddata.gz 1008061 RMD160 7acc341ed55ca61869c7876b8b37ac2a143abd5b SHA1 32ecad03877a841fbc0cb31c269214640008d604 SHA256 7b4d80703067fc5a8bf3da80f7da699f26a665a3e5ca93004fe353a1d6e74f60
+DIST ind.traineddata.gz 836752 RMD160 bc714068a10982dcf32816e823dbca6be63489ee SHA1 f4214ce40c5f6ef92085a8a45e9ff03f7cf7afca SHA256 9b2ee5ab2d33511f5fb8edbaee3d3a448cf8ab9efbf8e5c852d59481317e8218
+DIST ita.traineddata.gz 939956 RMD160 522f4110223c0da1894d39bc49fd1400b1527e2e SHA1 c166ba79256f6e7c1b993b2db7403d794131fe05 SHA256 a6cbaa39fddee521090e48d6a83b6f62e77b5fe7d90ab8ce9ee4d8ada10730f5
+DIST jpn.traineddata.gz 14604738 RMD160 47dba0ff10e9bd6979a31fa1ccc79b7d84775432 SHA1 6d605eee29e76fb841924916bd34095bbbbc45c0 SHA256 ebbde8c607a05cdd97d492734896a24b2aa17d5b4fb00e9597a96b39c1f22aa3
+DIST kor.traineddata.gz 6032090 RMD160 1aa22020b2bcdea7721d111065b2070191149e32 SHA1 37bcd8110a426714f54d99f58b30104b3014ce5a SHA256 8d4709341dbda7da9f42bb1d39c4d22d5ca767c2a30fbe36cb2ad152f092b01f
+DIST lav.traineddata.gz 1018176 RMD160 eb1efa104597850e47cdae8fa70cc4f120959810 SHA1 b4efd308e725d743884f2984f804c82dd5382f63 SHA256 305173b54d836dae2f22e4f488734440a83e683636c033cafe066083738ceebc
+DIST lit.traineddata.gz 1012936 RMD160 65aab59e4be61c3734645f96e688dbd81aa384b1 SHA1 7adbe396a281c0f87c0b95da7e84b5b6029e3dbd SHA256 1a04f9e952a76d430c2b9d16140f2d42f27c72a9bbd55b3e8f2b6e701ef0f399
+DIST nld.traineddata.gz 954151 RMD160 4026b44d7849a0c78d1831e00805f985ffad6421 SHA1 f7e3d46b1747a19158ac0797e859b65c56b5045f SHA256 2826628b0ff22fc3bb5d6e9d6901a39141b805b78084e0c9ab61f12c28747831
+DIST nor.traineddata.gz 951018 RMD160 36ee419e1ba3a49db749f15c5b204bfaee0bc848 SHA1 fb65dede5fbe120823ecdcb0c6cbd1222ae7e245 SHA256 c97cbfd93f676b331296b729d5526d1accc1325474a6b6d91260d03a1c862606
+DIST pol.traineddata.gz 1060352 RMD160 08197fede8151cfdef50a6d2e41c55f384d3f909 SHA1 a303fc31b4b60532b01b4ccdc838f02ff0113f27 SHA256 708e2c59cc4e6451e90fc1ef57b83b809bc354b9e0ef3935e9f181d2a718d5ce
+DIST por.traineddata.gz 911645 RMD160 90a73ffdf23c3ca9cc1b30d5f30943c67f4f59a0 SHA1 883e5e1fa1d991ef6d202951ee9d26a71db181dc SHA256 25df33e4f6c55749d04a5307f1143e31a71a76b7417b91a531c686bf49308b85
+DIST ron.traineddata.gz 929925 RMD160 9755d5002e6dfd581003168bae24bc0697c89318 SHA1 c20c73a2e17f5fe692de0fe9ac681da3984229ae SHA256 367ccb440283e57e4e3f87cd0b97a59a07ceb920e09da8778feafe5e086a9892
+DIST rus.traineddata.gz 848490 RMD160 aeb8ab308499f2414c21f5299f2abf5082c6a282 SHA1 2740accefc45e4ae004269ccb195948b8037a583 SHA256 8ced9431d2b1d544fbdd362c36786e3804451da06093bc45acecad84829e5f7a
+DIST slk.traineddata.gz 1091624 RMD160 4e661fce45076128455f2b2d0e60b93d67a8038d SHA1 16207e26d53504f98a7b1fadcb873dc4611149ec SHA256 037839ad756b9177f7d8f7bd3f01a3a1012094360c5c62a49abbde0a33389511
+DIST slv.traineddata.gz 930221 RMD160 854b6ec39d09ec210a7850d56573f5a77da37b6e SHA1 fbe464cd49d6a7495e6d95600d421aa2dd0b9d77 SHA256 bb7318f24972abc380688c67be86d07193e0294b252b60c648720978ad4a8b04
+DIST spa.traineddata.gz 910992 RMD160 92303810e9429ca5a9daa39e02a015a78ac09cd6 SHA1 7b30950749e84891fdef5f89409c3cf1b6418cd3 SHA256 5de0748b068f35c941e86bba622e23a376a6b084cb094007e7059714f1e030e0
+DIST srp.traineddata.gz 977674 RMD160 70ef247a50a72c5d551f2f7dc246275b9096a9b8 SHA1 47afc601b62998e4cc3f7403d846ba861f30b416 SHA256 542b83f62389ff8cc34746dd765345566ab368b63dcf275c223ecb52c2cb9291
+DIST swe.traineddata.gz 959911 RMD160 7653dd8a57f75a9e240a8e256c7a4b20a2e91040 SHA1 55291e8ea664155ad51db867284c11ad1a1c5d00 SHA256 25331ca1f41378a86336604476049810a0c5350417205e300cb2d11048cec2c1
+DIST tesseract-3.00.tar.gz 3436992 RMD160 82c84c33c414b9196d42f4675251f3ef3ab11b3a SHA1 1d797cae3d0047d7be6c9b17d1973a9afcf6bd81 SHA256 9c32d45f9c5ccf2bd9703a9db1371aaf0fc8f89f2dc536096442b31c3c4c3984
+DIST tgl.traineddata.gz 978138 RMD160 669a4ea7bb6beac425070d8ba424f341eee447a9 SHA1 153ba1d0ddd209e1581d81d42fe5346f748e2f27 SHA256 d4d88e1fb31771d0b42e65291c03da3a167541df7c9682b81d833a4bd6c9e8cf
+DIST tur.traineddata.gz 933401 RMD160 15cebea216b539681d7306cab3f31f2773cd93e2 SHA1 a01da62f3830833b258e2d46ce0f2852571470e6 SHA256 cb4e187f6c25c145252c88a576f3e3c9204c1d77e1c012172a85980c5c01088b
+DIST ukr.traineddata.gz 927741 RMD160 0f040ec98453c38a0f19e90d7f34ab0dee8e9778 SHA1 06ceebfd91fa473d6d91f8a2856c66733bea0131 SHA256 a54f0ce0843c863f102bfe135939200fc18702ce61ae6ebc571ab49460849365
+DIST vie.traineddata.gz 1575539 RMD160 74826cef758cf5a8c561c2e6381e053e151a6a91 SHA1 9158748a63afe87e4e25b5f32c222555f2ad8417 SHA256 5f61c32daf9a7071ff0dc95415aed75276538813398e3debb3849bba70bda713
+EBUILD tesseract-3.00.ebuild 3841 RMD160 18bbff21013c4310eed2eda9a914ae06d91cc890 SHA1 38f9ad80583e7c22f1afa758639ce4b9be12d211 SHA256 0b844c5d076293d4a948087a4f5c36c38e8a54e73db432d13a5da0fb2082b35f
+MISC metadata.xml 534 RMD160 070c47db76580ba3995558b40efc0eb061d309cd SHA1 a04fca213b556a7abe6aef72bd31e47e8879afaa SHA256 3d9cc30fa1c676f60c8fb877139cfb9d29183297d89bf9b0705497aa51e7a1e3
diff --git a/app-text/tesseract/metadata.xml b/app-text/tesseract/metadata.xml
new file mode 100644
index 0000000..1d369fc
--- /dev/null
+++ b/app-text/tesseract/metadata.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
+<pkgmetadata>
+<herd>no-herd</herd>
+<!-- Note, this is different from portage -->
+<maintainer>
+ <email>nirbheek@gentoo.org</email>
+ <name>Nirbheek Chauhan</name>
+</maintainer>
+<longdescription lang="en">
+A commercial quality OCR engine originally developed at HP between 1985 and
+1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was
+open-sourced by HP and UNLV in 2005.
+</longdescription>
+</pkgmetadata>
+
diff --git a/app-text/tesseract/tesseract-3.00.ebuild b/app-text/tesseract/tesseract-3.00.ebuild
new file mode 100644
index 0000000..bb17d93
--- /dev/null
+++ b/app-text/tesseract/tesseract-3.00.ebuild
@@ -0,0 +1,98 @@
+# Copyright 1999-2010 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Header: /var/cvsroot/gentoo-x86/app-text/tesseract/tesseract-2.04-r1.ebuild,v 1.6 2010/04/16 18:53:16 hwoarang Exp $
+
+EAPI="3"
+
+inherit eutils
+
+DESCRIPTION="An OCR Engine that was developed at HP and now at Google"
+HOMEPAGE="http://code.google.com/p/tesseract-ocr/"
+URI_PREFIX="http://tesseract-ocr.googlecode.com/files"
+SRC_URI="${URI_PREFIX}/${P}.tar.gz
+ ${URI_PREFIX}/eng.traineddata.gz
+ linguas_bg? ( ${URI_PREFIX}/bul.traineddata.gz )
+ linguas_ca? ( ${URI_PREFIX}/cat.traineddata.gz )
+ linguas_cs? ( ${URI_PREFIX}/ces.traineddata.gz )
+ linguas_da? ( ${URI_PREFIX}/dan.traineddata.gz )
+ linguas_de? ( ${URI_PREFIX}/deu.traineddata.gz )
+ linguas_el? ( ${URI_PREFIX}/ell.traineddata.gz )
+ linguas_es? ( ${URI_PREFIX}/spa.traineddata.gz )
+ linguas_fi? ( ${URI_PREFIX}/fin.traineddata.gz )
+ linguas_fr? ( ${URI_PREFIX}/fra.traineddata.gz )
+ linguas_id? ( ${URI_PREFIX}/ind.traineddata.gz )
+ linguas_it? ( ${URI_PREFIX}/ita.traineddata.gz )
+ linguas_hu? ( ${URI_PREFIX}/hun.traineddata.gz )
+ linguas_ja? ( ${URI_PREFIX}/jpn.traineddata.gz )
+ linguas_ko? ( ${URI_PREFIX}/kor.traineddata.gz )
+ linguas_lt? ( ${URI_PREFIX}/lit.traineddata.gz )
+ linguas_lv? ( ${URI_PREFIX}/lav.traineddata.gz )
+ linguas_nl? ( ${URI_PREFIX}/nld.traineddata.gz )
+ linguas_nb? ( ${URI_PREFIX}/nor.traineddata.gz )
+ linguas_pl? ( ${URI_PREFIX}/pol.traineddata.gz )
+ linguas_pt? ( ${URI_PREFIX}/por.traineddata.gz )
+ linguas_ro? ( ${URI_PREFIX}/ron.traineddata.gz )
+ linguas_ru? ( ${URI_PREFIX}/rus.traineddata.gz )
+ linguas_sl? ( ${URI_PREFIX}/slv.traineddata.gz )
+ linguas_sk? ( ${URI_PREFIX}/slk.traineddata.gz )
+ linguas_sr? ( ${URI_PREFIX}/srp.traineddata.gz )
+ linguas_sv? ( ${URI_PREFIX}/swe.traineddata.gz )
+ linguas_tl? ( ${URI_PREFIX}/tgl.traineddata.gz )
+ linguas_tr? ( ${URI_PREFIX}/tur.traineddata.gz )
+ linguas_uk? ( ${URI_PREFIX}/ukr.traineddata.gz )
+ linguas_vi? ( ${URI_PREFIX}/vie.traineddata.gz )
+ linguas_zh_CN? ( ${URI_PREFIX}/chi_sim.traineddata.gz )
+ linguas_zh_TW? ( ${URI_PREFIX}/chi_tra.traineddata.gz )
+"
+
+LICENSE="Apache-2.0"
+SLOT="0"
+KEYWORDS="~alpha ~amd64 ~mips ~ppc ~ppc64 ~sparc ~x86"
+IUSE="examples tiff linguas_bg linguas_ca linguas_cs linguas_da linguas_de
+linguas_el linguas_es linguas_fi linguas_fr linguas_id linguas_it linguas_hu
+linguas_ja linguas_ko linguas_lt linguas_lv linguas_nl linguas_nb linguas_pl
+linguas_pt linguas_ro linguas_ru linguas_sl linguas_sk linguas_sr linguas_sv
+linguas_tl linguas_tr linguas_uk linguas_vi linguas_zh_CN linguas_zh_TW"
+
+RDEPEND="sys-libs/zlib
+ media-libs/libpng
+ virtual/jpeg
+ tiff? ( media-libs/tiff )"
+DEPEND="${RDEPEND}
+ sys-devel/gettext"
+
+# NOTES:
+# english language files are always installed because they are used by default
+# that is a tesseract bug and if possible this workaround should be avoided
+# see bug 287373
+# deu-f corresponds to an old german graphic style named fraktur
+# that's the same language (german, de)
+# stuff in directory java/ seems useless...
+# in testing/, there is a way to test accuracy, not usable for src_test()
+# app-ocr/ would be a better category
+
+src_prepare() {
+ # remove obsolete makefile, install target only in uppercase Makefile
+ rm "${S}/java/makefile" || die "remove obsolete java makefile failed"
+}
+
+src_configure() {
+ econf $(use_with tiff libtiff) \
+ --enable-gettext \
+ --enable-graphics \
+ --disable-dependency-tracking
+}
+
+src_install() {
+ emake DESTDIR="${ED}" install || die "emake install failed"
+
+ dodoc AUTHORS ChangeLog NEWS README ReleaseNotes || die "dodoc failed"
+
+ # Copy training data
+ mv "${WORKDIR}"/*.traineddata "${ED}"/usr/share/tessdata || die "moving training data failed"
+
+ if use examples; then
+ insinto /usr/share/doc/${PF}/examples
+ doins eurotext.tif phototest.tif || die "doins failed"
+ fi
+}