summaryrefslogtreecommitdiff
path: root/graphics/ocropus
diff options
context:
space:
mode:
authorDmitry Marakasov <amdmi3@FreeBSD.org>2009-06-17 22:33:19 +0000
committerDmitry Marakasov <amdmi3@FreeBSD.org>2009-06-17 22:33:19 +0000
commit132ab536a33a33044350259cbeecca9fb6bbb968 (patch)
tree1f021bd3037ae5a8a01fee64a51f2eda1d36c23d /graphics/ocropus
parent- Properly support SDL (diff)
- Update to 0.4
PR: 135665 Submitted by: Hiroto Kagotani <hiroto.kagotani@gmail.com> (maintainer)
Notes
Notes: svn path=/head/; revision=236171
Diffstat (limited to 'graphics/ocropus')
-rw-r--r--graphics/ocropus/Makefile24
-rw-r--r--graphics/ocropus/distinfo6
-rw-r--r--graphics/ocropus/files/patch-Makefile.am11
-rw-r--r--graphics/ocropus/files/patch-configure.ac31
-rw-r--r--graphics/ocropus/files/patch-genAM.py20
-rw-r--r--graphics/ocropus/files/patch-ocr-utils__components.cc10
-rw-r--r--graphics/ocropus/files/patch-ocr-utils__narray-io.h6
-rw-r--r--graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua96
-rw-r--r--graphics/ocropus/pkg-plist141
9 files changed, 114 insertions, 231 deletions
diff --git a/graphics/ocropus/Makefile b/graphics/ocropus/Makefile
index 5ecb3206249d..abc227ea53f8 100644
--- a/graphics/ocropus/Makefile
+++ b/graphics/ocropus/Makefile
@@ -6,29 +6,31 @@
#
PORTNAME= ocropus
-PORTVERSION= 0.3.1
+PORTVERSION= 0.4
CATEGORIES= graphics
MASTER_SITES= ${MASTER_SITE_GOOGLE_CODE}
MAINTAINER= hiroto.kagotani@gmail.com
COMMENT= The OCRopus(tm) open source document analysis and OCR system
-BUILD_DEPENDS= ${LOCALBASE}/lib/libtesseract_full.a:${PORTSDIR}/graphics/tesseract
LIB_DEPENDS= png.5:${PORTSDIR}/graphics/png \
jpeg.9:${PORTSDIR}/graphics/jpeg \
tiff.4:${PORTSDIR}/graphics/tiff \
- iulib.0:${PORTSDIR}/graphics/iulib
+ iulib.0:${PORTSDIR}/graphics/iulib \
+ gsl.13:${PORTSDIR}/math/gsl
-WRKSRC= ${WRKDIR}/ocropus-0.3
+WRKSRC= ${WRKDIR}/ocropus-0.4/ocropus
OPTIONS= SDL "Enable SDL for graphical debugging" off \
LEPTONICA "Enable Leptonica image analysis" off
MAKE_JOBS_UNSAFE= yes
+USE_PYTHON_BUILD= yes
USE_AUTOTOOLS= aclocal:110 automake:110 autoconf:262
USE_GMAKE= yes
-CONFIGURE_ARGS= --without-fst
+CONFIGURE_ARGS= --without-fst --without-tesseract --with-iulib=${LOCALBASE}
CONFIGURE_ENV= CPPFLAGS=-I${LOCALBASE}/include LDFLAGS=-L${LOCALBASE}/lib
+USE_GCC= 4.2+
.include <bsd.port.pre.mk>
@@ -40,12 +42,20 @@ USE_SDL= sdl
.if !defined(WITH_LEPTONICA)
CONFIGURE_ARGS+=--without-leptonica
-PLIST_SUB+= LEPTONICA="@comment "
.else
LIB_DEPENDS+= lept.0:${PORTSDIR}/graphics/leptonlib
-PLIST_SUB+= LEPTONICA=""
.endif
+pre-configure:
+ @(cd ${CONFIGURE_WRKSRC} \
+ && ${SETENV} ${PYTHON_CMD} genAM.py >Makefile.am)
+
run-autotools: run-autotools-aclocal run-autotools-automake run-autotools-autoconf
+run-autotools-autoconf:
+ @(cd ${CONFIGURE_WRKSRC} && ${SETENV} ${AUTOTOOLS_ENV} ${AUTOCONF} \
+ ${AUTOCONF_ARGS})
+ @${REINPLACE_CMD} -e 's|-lpthread|${PTHREAD_LIBS}|'\
+ ${WRKSRC}/configure
+
.include <bsd.port.post.mk>
diff --git a/graphics/ocropus/distinfo b/graphics/ocropus/distinfo
index 4cb3801b19eb..37903742d817 100644
--- a/graphics/ocropus/distinfo
+++ b/graphics/ocropus/distinfo
@@ -1,3 +1,3 @@
-MD5 (ocropus-0.3.1.tar.gz) = 2a1b66419ae69ef031d5e6269db15bb5
-SHA256 (ocropus-0.3.1.tar.gz) = ee02d209a1c823090f0bceba7ec4a884029f66fc44147a2d34922f8148a699df
-SIZE (ocropus-0.3.1.tar.gz) = 12061574
+MD5 (ocropus-0.4.tar.gz) = d883ee9c9fd63bfdd42b25992f63c9ed
+SHA256 (ocropus-0.4.tar.gz) = 2a564916d05e1badf1a527cc59015f3397e57f7ab9dc1269781f046f87d1b8ff
+SIZE (ocropus-0.4.tar.gz) = 21311501
diff --git a/graphics/ocropus/files/patch-Makefile.am b/graphics/ocropus/files/patch-Makefile.am
deleted file mode 100644
index 37b04f6fd88a..000000000000
--- a/graphics/ocropus/files/patch-Makefile.am
+++ /dev/null
@@ -1,11 +0,0 @@
---- ./Makefile.am.orig 2008-10-16 05:40:47.000000000 +0900
-+++ ./Makefile.am 2009-05-26 21:25:34.000000000 +0900
-@@ -110,7 +110,7 @@
-
- # run check-style everytime and give a hint about make check
- all:
-- $(srcdir)/utilities/check-style -f $(srcdir)
-+# $(srcdir)/utilities/check-style -f $(srcdir)
- @echo
- @echo "Use 'make check' to run tests!"
- @echo
diff --git a/graphics/ocropus/files/patch-configure.ac b/graphics/ocropus/files/patch-configure.ac
index 1995713ab60d..49821662f843 100644
--- a/graphics/ocropus/files/patch-configure.ac
+++ b/graphics/ocropus/files/patch-configure.ac
@@ -1,6 +1,6 @@
---- ./configure.ac.orig 2008-10-16 05:40:35.000000000 +0900
-+++ ./configure.ac 2009-05-26 21:22:11.000000000 +0900
-@@ -116,6 +116,8 @@
+--- ./configure.ac.orig 2009-06-01 05:18:41.000000000 +0900
++++ ./configure.ac 2009-06-17 19:47:20.000000000 +0900
+@@ -114,11 +114,19 @@
AC_MSG_ERROR([no TIFFOpen; please install libtiff4-dev or equivalent]))
AC_LANG_CPLUSPLUS
@@ -9,12 +9,33 @@
# --- iulib (required) ---
# NB: we can only use functions with C linkage here
-@@ -180,7 +182,7 @@
+ AC_CHECK_LIB(iulib, exit,,
+- AC_MSG_ERROR([no iulib; please install iulib first (see INSTALL)]))
++ AC_CHECK_LIB(avcodec, avcodec_open, ,
++ AC_MSG_ERROR([no iulib; please install iulib first (see INSTALL)]))
++ AC_CHECK_LIB(avformat, url_fopen, ,
++ AC_MSG_ERROR([no iulib; please install iulib first (see INSTALL)]))
++ AC_CHECK_LIB(iulib, sleep, ,
++ AC_MSG_ERROR([no iulib; please install iulib first (see INSTALL)]))
++)
+
+ # --- libpthread (needed by tesseract) ---
+ AC_CHECK_LIB(pthread, pthread_create,,)
+@@ -176,14 +184,14 @@
LDFLAGS="$LDFLAGS -L$leptheaders/../../lib"
AC_CHECK_LIB(lept,pixCreate,,AC_MSG_ERROR([leptonica not found! Choose --without-leptonica if you don't want to use it.]))
fi
-AM_CONDITIONAL([use_leptonica], [test x$use_leptonica == xyes])
+AM_CONDITIONAL([use_leptonica], [test x$use_leptonica = xyes])
+ # --- GSL (optional for glinerec) ---
+-AC_SUBST(use_gsl, "yes")
++AC_SUBST(use_gsl, "no")
+ AC_CHECK_LIB(gslcblas, abort,,AC_SUBST(use_gsl, "no"))
+ AC_CHECK_LIB(gsl, gsl_error,,AC_SUBST(use_gsl, "no"))
+ AC_CHECK_LIB(blas, exit,,AC_SUBST(use_gsl, "no"))
+-AM_CONDITIONAL([use_gsl], [test x$use_gsl == xyes])
++AM_CONDITIONAL([use_gsl], [test x$use_gsl = xyes])
+
- # --- SDL (optional for graphical debugging in ocroscript) ---
+ # --- SDL (optional for graphical debugging) ---
diff --git a/graphics/ocropus/files/patch-genAM.py b/graphics/ocropus/files/patch-genAM.py
new file mode 100644
index 000000000000..b7bbdc0bcaad
--- /dev/null
+++ b/graphics/ocropus/files/patch-genAM.py
@@ -0,0 +1,20 @@
+--- ./genAM.py.orig 2009-06-01 05:18:41.000000000 +0900
++++ ./genAM.py 2009-06-17 19:44:58.000000000 +0900
+@@ -62,7 +62,7 @@
+ ocropusincludedir=$(includedir)/ocropus
+
+ AM_CPPFLAGS = -I$(srcdir)/include -I$(srcdir)/ocr-utils \
+--I@iulibheaders@ -I@tessheaders@
++-I@iulibheaders@
+
+ AM_LDFLAGS =
+
+@@ -180,7 +180,7 @@
+ print """
+ # run check-style everytime and give a hint about make check
+ all:
+- $(srcdir)/utilities/check-style -f $(srcdir)
++# $(srcdir)/utilities/check-style -f $(srcdir)
+ @echo
+ @echo "Use 'make check' to run tests!"
+ @echo
diff --git a/graphics/ocropus/files/patch-ocr-utils__components.cc b/graphics/ocropus/files/patch-ocr-utils__components.cc
new file mode 100644
index 000000000000..d4cb3ab43ca3
--- /dev/null
+++ b/graphics/ocropus/files/patch-ocr-utils__components.cc
@@ -0,0 +1,10 @@
+--- ./ocr-utils/components.cc.orig 2009-06-01 05:18:41.000000000 +0900
++++ ./ocr-utils/components.cc 2009-06-17 19:44:58.000000000 +0900
+@@ -26,6 +26,7 @@
+ #include "colib/colib.h"
+ #include "iulib/iulib.h"
+ #include "components.h"
++extern char **environ;
+
+ using namespace colib;
+
diff --git a/graphics/ocropus/files/patch-ocr-utils__narray-io.h b/graphics/ocropus/files/patch-ocr-utils__narray-io.h
index 91adc25b6442..74c65c051d60 100644
--- a/graphics/ocropus/files/patch-ocr-utils__narray-io.h
+++ b/graphics/ocropus/files/patch-ocr-utils__narray-io.h
@@ -1,10 +1,10 @@
---- ./ocr-utils/narray-io.h.orig 2008-10-16 05:40:46.000000000 +0900
-+++ ./ocr-utils/narray-io.h 2009-05-26 21:22:11.000000000 +0900
+--- ./ocr-utils/narray-io.h.orig 2009-06-01 05:18:41.000000000 +0900
++++ ./ocr-utils/narray-io.h 2009-06-17 19:44:58.000000000 +0900
@@ -31,6 +31,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
- #include "colib.h"
+ #include "ocropus.h"
namespace ocropus {
diff --git a/graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua b/graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua
deleted file mode 100644
index d428d7d718cb..000000000000
--- a/graphics/ocropus/files/patch-ocroscript__scripts__rec-tess-complete.lua
+++ /dev/null
@@ -1,96 +0,0 @@
---- ./ocroscript/scripts/rec-tess-complete.lua.orig 2008-10-16 05:40:35.000000000 +0900
-+++ ./ocroscript/scripts/rec-tess-complete.lua 2009-05-26 21:22:11.000000000 +0900
-@@ -20,11 +20,20 @@
- -- Reviewer:
- -- Primary Repository:
- -- Web Sites: www.iupr.org, www.dfki.de, www.ocropus.org
-+--
-+-- Patch applied:
-+-- http://code.google.com/p/ocropus/issues/detail?id=137
-
-
- require 'lib.util'
- require 'lib.headings'
- require 'lib.paragraphs'
-+require 'lib.path'
-+require 'lib.hocr'
-+import_all(ocr)
-+import_all(graphics)
-+import_all(iulib)
-+import_all(nustring)
-
- remove_hyphens = true
-
-@@ -74,7 +83,7 @@
- -- RecognizedPage is a transport object of tesseract_recognize_blockwise().
- -- This function will convert it to a DOM.
- function convert_RecognizedPage_to_DOM(p, image_path, keep_char_boxes)
-- page_DOM = get_page_DOM(p, image_path)
-+ page_DOM = hocr.get_page_DOM(p, image_path)
- for i = 0, p:linesCount() - 1 do
- local bbox = p:bbox(i)
- local text = nustring()
-@@ -85,13 +94,12 @@
- bboxes = narray_to_table(r)
- end
- p:text(text, i)
-- line_DOM = get_line_DOM(bbox, text, bboxes, p)
-+ line_DOM = hocr.get_line_DOM(bbox, text, bboxes, p)
- table.insert(page_DOM, line_DOM)
- end
- return page_DOM
- end
-
--
- function get_images_DOM(tiseg_image, html_path, images_dir, page_image)
- os.execute('mkdir -p "'..images_dir..'"')
- local rects = rectarray()
-@@ -102,12 +110,11 @@
- local dom = {{tag = 'hr', size = '0'}}
- for i = 0, rects:length() - 1 do
- local src = images_dir .. ('/%04d.png'):format(i + 1)
-- local img_path = util.combine_paths(html_path, src)
- img = bytearray()
- r = rects:at(i)
- extract_subimage(img, page_image, r.x0, r.y0, r.x1, r.y1)
-- write_image_gray(img_path, img)
-- local props = {bbox = bbox_to_string(page_image, r)}
-+ iulib.write_image_gray(src, img)
-+ local props = {bbox = hocr.bbox_to_string(page_image, r)}
- local link = {tag = 'a', href=src}
- local width = r.x1 - r.x0
- local height = r.y1 -r.y0
-@@ -119,7 +126,7 @@
- height = "200px"
- end
- local tag = {tag = 'img', src = src, width=width, height=height,
-- class = 'ocr_image', title = hocr_properties_attribute(props)}
-+ class = 'ocr_image', title = hocr.properties_attribute(props)}
- table.insert(link, tag)
- table.insert(dom, link)
- table.insert(dom, '\n')
-@@ -146,8 +153,8 @@
- get_nontext_mask(nontext_mask,tiseg_image)
- remove_masked_region(text_image,nontext_mask,clean_image)
- segmenter:segment(page_segmentation,text_image)
-- local p = RecognizedPage()
-- tesseract_recognize_blockwise(p, page_image, page_segmentation)
-+ local p = tesseract.RecognizedPage()
-+ tesseract.recognize_blockwise(p, page_image, page_segmentation)
- page_DOM = convert_RecognizedPage_to_DOM(p, pages:getFileName(),
- option("charboxes"))
- page_DOM = detect_headings(page_DOM, page_image)
-@@ -157,10 +164,10 @@
- table.insert(body_DOM, page_DOM)
- end
- --end
--doc_DOM = get_html_tag()
--table.insert(doc_DOM, get_head_tag())
-+doc_DOM = hocr.get_html_tag()
-+table.insert(doc_DOM, hocr.get_head_tag())
- table.insert(doc_DOM, '\n')
- table.insert(doc_DOM, body_DOM)
- file = io.open(output_file, 'w')
--dump_DOM(file, doc_DOM, html_preamble)
-+hocr.dump(file, doc_DOM, html_preamble)
- file:close()
diff --git a/graphics/ocropus/pkg-plist b/graphics/ocropus/pkg-plist
index c1fa889c9bd0..97b1fc0fc829 100644
--- a/graphics/ocropus/pkg-plist
+++ b/graphics/ocropus/pkg-plist
@@ -1,119 +1,48 @@
-bin/ocroscript
lib/libocropus.a
-lib/libocroscript.a
-include/ocropus/extern.h
-include/ocropus/read_image.h
-include/ocropus/function.h
-include/ocropus/defs.h
-include/ocropus/voronoi-ocropus.h
-include/ocropus/const.h
-include/ocropus/beam-search.h
-include/ocropus/langmod-shortest-path.h
-include/ocropus/lattice.h
-include/ocropus/ocr-binarize-sauvola.h
-include/ocropus/ocr-binarize-otsu.h
-include/ocropus/grouping.h
-include/ocropus/make-garbage.h
-include/ocropus/charlib.h
-include/ocropus/feature-extractor.h
-include/ocropus/confusion-matrix.h
-include/ocropus/bpnet.h
-include/ocropus/additions.h
-include/ocropus/feature-stream.h
-include/ocropus/mnist.h
-include/ocropus/classmap.h
-include/ocropus/classify-chars.h
-include/ocropus/bpnetline.h
-include/ocropus/ocr-deskew-rast.h
-include/ocropus/ocr-noisefilter.h
-include/ocropus/ocr-doc-clean.h
-include/ocropus/ocr-doc-clean-concomp.h
-include/ocropus/ocr-pageframe-rast.h
-include/ocropus/ocr-whitespace-cover.h
-include/ocropus/ocr-char-stats.h
-include/ocropus/ocr-layout-rast.h
+bin/ocr-distance
+bin/ocropus
+include/ocropus/ocr-openfst.h
+include/ocropus/glclass.h
include/ocropus/line-info.h
-include/ocropus/ocr-extract-gutters.h
-include/ocropus/ocr-ctextline-rast.h
-include/ocropus/ocr-ctextline-rast-extended.h
-include/ocropus/ocr-classify-zones.h
-include/ocropus/ocr-pageseg-wcuts.h
-include/ocropus/ocr-text-image-seg.h
-include/ocropus/log-reg-data.h
-include/ocropus/ocr-pageseg-xycut.h
-include/ocropus/ocr-word-segmentation.h
-include/ocropus/ocrcomponents.h
-include/ocropus/kmeans.h
+include/ocropus/glcuts.h
+include/ocropus/tesseract.h
+include/ocropus/ocr-pfst.h
+include/ocropus/glfmaps.h
+include/ocropus/gldataset.h
+include/ocropus/glutils.h
+include/ocropus/grouper.h
+include/ocropus/ocropus.h
+include/ocropus/gsl.h
include/ocropus/glinerec.h
-include/ocropus/idmap.h
+include/ocropus/ocr-layout.h
include/ocropus/narray-io.h
-include/ocropus/ocr-segmentations.h
-include/ocropus/eigens.h
-include/ocropus/sysutil.h
+include/ocropus/queue.h
+include/ocropus/grid.h
include/ocropus/logger.h
-include/ocropus/grouper.h
-include/ocropus/segmentation.h
include/ocropus/pages.h
-include/ocropus/didegrade.h
-include/ocropus/lines.h
-include/ocropus/regionextractor.h
-include/ocropus/ocr-utils.h
+include/ocropus/linesegs.h
+include/ocropus/components.h
+include/ocropus/stringutil.h
+include/ocropus/init-ocropus.h
+include/ocropus/editdist.h
+include/ocropus/ocrinterfaces.h
+include/ocropus/arraypaint.h
+include/ocropus/narray-binio.h
+include/ocropus/sysutil.h
+include/ocropus/segmentation.h
include/ocropus/resource-path.h
-include/ocropus/queue.h
-include/ocropus/grid.h
-include/ocropus/seg-eval.h
+include/ocropus/xml-entities.h
+include/ocropus/pagesegs.h
+include/ocropus/ocr-utils.h
+include/ocropus/didegrade.h
+include/ocropus/docproc.h
include/ocropus/enumerator.h
-include/ocropus/editdist.h
-include/ocropus/tesseract.h
-include/ocropus/recognized-page.h
-%%LEPTONICA%%include/ocropus/ocr-text-image-seg-leptonica.h
-%%DATADIR%%/models/neural-net-file.nn
+%%DATADIR%%/models/default.fst.gz
+%%DATADIR%%/models/default.model
+%%DATADIR%%/models/latin3-full.model
+%%DATADIR%%/models/ocr-dict-case.fst.gz
%%DATADIR%%/words/en-us
-%%DATADIR%%/scripts/lib/align.lua
-%%DATADIR%%/scripts/lib/datasets.lua
-%%DATADIR%%/scripts/lib/editdist.lua
-%%DATADIR%%/scripts/lib/getopt.lua
-%%DATADIR%%/scripts/lib/headings.lua
-%%DATADIR%%/scripts/lib/hocr.lua
-%%DATADIR%%/scripts/lib/paragraphs.lua
-%%DATADIR%%/scripts/lib/path.lua
-%%DATADIR%%/scripts/lib/util.lua
-%%DATADIR%%/scripts/lib/xml.lua
-%%DATADIR%%/scripts/align-lines-wordwise.lua
-%%DATADIR%%/scripts/align-transcription.lua
-%%DATADIR%%/scripts/align.lua
-%%DATADIR%%/scripts/build-ngram-model.lua
-%%DATADIR%%/scripts/check-train-valid-bpnet-feature.lua
-%%DATADIR%%/scripts/degrade.lua
-%%DATADIR%%/scripts/deskew.lua
-%%DATADIR%%/scripts/editdist.lua
-%%DATADIR%%/scripts/erode3.lua
-%%DATADIR%%/scripts/eval-bpnet-on-words.lua
-%%DATADIR%%/scripts/eval-editdist-layout.lua
-%%DATADIR%%/scripts/eval-on-word-list.lua
-%%DATADIR%%/scripts/hocr-to-text.lua
-%%DATADIR%%/scripts/line-clean.lua
-%%DATADIR%%/scripts/matra-clipping.lua
-%%DATADIR%%/scripts/rec-bpnet-isolated.lua
-%%DATADIR%%/scripts/rec-bpnet.lua
-%%DATADIR%%/scripts/rec-guided.lua
-%%DATADIR%%/scripts/rec-line.lua
-%%DATADIR%%/scripts/rec-ltess.lua
-%%DATADIR%%/scripts/rec-minimal.lua
-%%DATADIR%%/scripts/rec-tess-complete.lua
-%%DATADIR%%/scripts/recognize.lua
-%%DATADIR%%/scripts/reflow.lua
-%%DATADIR%%/scripts/sauvola.lua
-%%DATADIR%%/scripts/segment-line.lua
-%%DATADIR%%/scripts/show.lua
-%%DATADIR%%/scripts/showseg.lua
-%%DATADIR%%/scripts/strict.lua
-%%DATADIR%%/scripts/text-to-hocr.lua
-%%DATADIR%%/scripts/train-bpnet-isolated.lua
-%%DATADIR%%/scripts/train-bpnet-lines.lua
@dirrm include/ocropus
@dirrm %%DATADIR%%/models
-@dirrm %%DATADIR%%/scripts/lib
-@dirrmtry %%DATADIR%%/scripts
@dirrm %%DATADIR%%/words
@dirrmtry %%DATADIR%%