summaryrefslogtreecommitdiff
blob: efa9df5d709525ec45fb4a85fec35e7d06108eb4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Copyright 1999-2018 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2

EAPI=6

MY_PN="tesseract-ocr"
LANGPACKV="4.00"
URI_PREFIX="https://github.com/${MY_PN}/tessdata/raw/${LANGPACKV}/"
JAVA_PKG_OPT_USE="scrollview"

inherit autotools git-r3 java-pkg-opt-2 toolchain-funcs

DESCRIPTION="An OCR Engine, orginally developed at HP, now open source."
HOMEPAGE="https://github.com/tesseract-ocr"
SRC_URI="${URI_PREFIX}eng.traineddata -> eng.traineddata-${LANGPACKV}
	math? ( ${URI_PREFIX}equ.traineddata -> equ.traineddata-${LANGPACKV} )
	osd? ( ${URI_PREFIX}osd.traineddata -> osd.traineddata-${LANGPACKV} )"
EGIT_REPO_URI="https://github.com/${MY_PN}/${PN}.git"

LICENSE="Apache-2.0"
SLOT="0"
KEYWORDS=""
IUSE="doc jpeg math opencl openmp osd png scrollview static-libs tiff training webp"

# List of supported Gentoo linguas and their upstream mapping
# https://github.com/tesseract-ocr/tesseract/wiki/Data-Files
# "old" variants were regrouped in the matching modern locale
LANGUAGES="af:afr am:amh ar:ara as:asm az:aze,aze_cyrl be:bel bn:ben bo:bod bs:bos bg:bul ca:cat cs:ces zh:chi_sim,chi_tra cy:cym da:dan de:deu,frk dz:dzo el:ell,grc en:enm eo:epo et:est eu:eus fa:fas fi:fin fr:fra,frm ga:gle gl:glg gu:guj he:heb hi:hin hr:hrv hu:hun id:ind is:isl it:ita,ita_old ja:jpn kn:kan ka:kat,kat_old kk:kaz km:khm ky:kir ko:kor ku:kur lo:lao la:lat lv:lav lt:lit ml:mal mr:mar mk:mkd ms:msa my:mya ne:nep nl:nld no:nor or:ori pa:pan pl:pol pt:por ro:ron ru:rus sa:san si:sin sk:slk sl:slv es:spa,spa_old sq:sqi sr:srp,srp_latn sw:swa sv:swe syc:syr ta:tam te:tel tg:tgk tl:tgl th:tha tr:tur ug:uig uk:ukr uz:uzb,uzb_cyrl vi:vie"
# Missing matches:
#	ceb 	Cebuano
#	chr 	Cherokee
#	hat 	Haitian; Haitian Creole
#	iku 	Inuktitut
#	jav 	Javanese
#	mlt 	Maltese
#	pus 	Pushto; Pashto
#	tir 	Tigrinya
#	urd 	Urdu
#	yid 	Yiddish
# l10n_en provides the additional data:
#	enm 	English, Middle (1100-1500)

for lang in ${LANGUAGES}; do
	gentoo_lang=${lang%:*}
	tess_langs=${lang#*:}
	for tess_lang in ${tess_langs//,/ }; do
		SRC_URI+=" l10n_${gentoo_lang}? ( ${URI_PREFIX}${tess_lang}.traineddata -> ${tess_lang}.traineddata-${LANGPACKV} )"
	done
	IUSE+=" l10n_${gentoo_lang}"
done

# With opencl USE=tiff is necessary in leptonica
CDEPEND=">=media-libs/leptonica-1.74:=[zlib,tiff?,jpeg?,png?,webp?]
	opencl? (
		virtual/opencl
		media-libs/tiff:0=
		media-libs/leptonica:=[tiff]
	)
	scrollview? (
		>=dev-java/piccolo2d-3.0:0
	)
	training? (
		dev-libs/icu:=
		x11-libs/pango:=
		x11-libs/cairo:=
	)"

DEPEND="${CDEPEND}
	doc? ( app-doc/doxygen )
	scrollview? ( >=virtual/jdk-1.7 )"

RDEPEND="${CDEPEND}
	scrollview? ( >=virtual/jre-1.7 )"

DOCS=( AUTHORS ChangeLog README.md )

PATCHES=(
	"${FILESDIR}/${PN}-4.00.00-use-system-piccolo2d.patch"
)

pkg_pretend() {
	[[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
}

pkg_setup() {
	[[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
}

src_unpack() {
	git-r3_src_unpack
	for file in ${A}; do
		if [[ "${file}" == *traineddata* ]]; then
			cp "${DISTDIR}/${file}" "${S}/tessdata/${file%-*}" || die
		fi
	done
}

src_prepare() {
	default
	eautoreconf

	java-pkg-opt-2_src_prepare
}

src_configure() {
	local myeconfargs=(
		--enable-shared
		$(use_enable opencl)
		$(use_enable openmp)
		$(use_enable scrollview graphics)
		$(use_enable static-libs static)
	)

	econf "${myeconfargs[@]}"
}

src_compile() {
	default
	use doc && emake doc
	use scrollview && emake ScrollView.jar JAVAC="javac $(java-pkg_javac-args)"
	use training && emake training
}

src_install() {
	use doc && HTML_DOCS=( doc/html/. )
	default
	prune_libtool_files

	if use training; then
		emake DESTDIR="${D}" training-install
	fi

	insinto /usr/share/tessdata
	doins tessdata/*traineddata* # language files
	use scrollview && doins java/ScrollView.jar # scrollview
}