summaryrefslogtreecommitdiff
path: root/graphics/ocrad
diff options
context:
space:
mode:
authorPav Lucistnik <pav@FreeBSD.org>2003-12-31 00:17:41 +0000
committerPav Lucistnik <pav@FreeBSD.org>2003-12-31 00:17:41 +0000
commit8b94b4ebc5eae120364b0df0733f6e8ffcda5047 (patch)
tree0723cf60067434039b4bb64fcd5ad3fbf4f7a660 /graphics/ocrad
parentUpdate to 1.2.4. (diff)
- Update to 0.6
PR: ports/60745 Submitted by: Ulrich Spoerlein <q@uni.de> (maintainer)
Diffstat (limited to 'graphics/ocrad')
-rw-r--r--graphics/ocrad/Makefile22
-rw-r--r--graphics/ocrad/distinfo2
-rw-r--r--graphics/ocrad/files/ocrad.1114
-rw-r--r--graphics/ocrad/files/patch-Makefile.in10
-rw-r--r--graphics/ocrad/files/patch-bitmap.cc34
-rw-r--r--graphics/ocrad/files/patch-character.h10
-rw-r--r--graphics/ocrad/files/patch-configure29
-rw-r--r--graphics/ocrad/files/patch-iso_8859_1.h9
-rw-r--r--graphics/ocrad/files/patch-main.cc21
-rw-r--r--graphics/ocrad/files/patch-textline.cc146
10 files changed, 144 insertions, 253 deletions
diff --git a/graphics/ocrad/Makefile b/graphics/ocrad/Makefile
index 29d747d48b03..a03f13295923 100644
--- a/graphics/ocrad/Makefile
+++ b/graphics/ocrad/Makefile
@@ -6,7 +6,7 @@
#
PORTNAME= ocrad
-PORTVERSION= 0.5
+PORTVERSION= 0.6
CATEGORIES= graphics
MASTER_SITES= ${MASTER_SITE_GNU}
MASTER_SITE_SUBDIR= ${PORTNAME}
@@ -17,17 +17,33 @@ COMMENT= OCR program implemented as filter
USE_BZIP2= yes
HAS_CONFIGURE= yes
USE_GETOPT_LONG= yes
+USE_REINPLACE= yes
CONFIGURE_ARGS= --prefix=${PREFIX}
-MAKE_ENV= CPPFLAGS="${CPPFLAGS}" LDFLAGS="${LDFLAGS}"
+MAKE_ENV= CPPFLAGS="${CPPFLAGS}" LDFLAGS="${LDFLAGS}" \
+ INSTALL_PROGRAM="${INSTALL_PROGRAM}"
DOCS= AUTHORS ChangeLog NEWS README TODO
INFO= ocrad
+MAN1= ocrad.1
+
+STD_PATCH= textline.cc recognize2.cc bitmap.cc main.cc
+
+.include <bsd.port.pre.mk>
+post-patch:
+.if (${OSVERSION} < 500000) && ! (defined(USE_GCC) && ${GCCVERSION} > 30000)
+.for file in ${STD_PATCH}
+ @${REINPLACE_CMD} -e 's/std::isspace/isspace/g; s/std::getc/getc/g' \
+ -e 's/std::ungetc/ungetc/g; s/std::feof/feof/g' \
+ -e 's/std::ferror/ferror/g' ${WRKSRC}/${file}
+.endfor
+.endif
.if !defined(NOPORTDOCS)
post-install:
@${MKDIR} ${DOCSDIR}
cd ${WRKSRC} && ${INSTALL_DATA} ${DOCS} ${DOCSDIR}
+ @${INSTALL_MAN} ${FILESDIR}/ocrad.1 ${PREFIX}/man/man1
.endif
-.include <bsd.port.mk>
+.include <bsd.port.post.mk>
diff --git a/graphics/ocrad/distinfo b/graphics/ocrad/distinfo
index f91e66017830..dcacc5f0109c 100644
--- a/graphics/ocrad/distinfo
+++ b/graphics/ocrad/distinfo
@@ -1 +1 @@
-MD5 (ocrad-0.5.tar.bz2) = 75bdfda680ddeede5dafa523a16c7191
+MD5 (ocrad-0.6.tar.bz2) = ebcefd3512a4f9d870d302167d8b8ec9
diff --git a/graphics/ocrad/files/ocrad.1 b/graphics/ocrad/files/ocrad.1
new file mode 100644
index 000000000000..a99b2fdfdd73
--- /dev/null
+++ b/graphics/ocrad/files/ocrad.1
@@ -0,0 +1,114 @@
+.TH OCRAD 1 "30 December 2003" "0.6" "GNU Ocrad"
+.SH NAME
+ocrad \- Optical Character Recognition
+.SH SYNOPSIS
+.I ocrad
+\-afhivV \-b NUMBER \-l MODE \-o FILE \-x FILE [FILES ...]
+.Sh DESCRIPTION
+.LP
+.I ocrad
+is an OCR (Optical Character Recognition) program
+implemented as a filter and based on a feature extraction method. It
+reads a bitmap image in pbm format and outputs text in ISO\-8859\-1
+(Latin\-1) charset. Also includes a layout analyser able to separate
+the columns or blocks of text normally found on printed pages. It can
+be used as a stand\-alone console application, or as a backend to other
+programs.
+.SH OPTIONS
+.TP
+.I "\-a", "\-\-append"
+Append generated text to the output file instead of overwriting it.
+.TP
+.I "\-b NUMBER", "\-\-block=NUMBER"
+Process only the specified text block, beginning from 1.
+Is only useful when used in conjunction with layout analysis (see below).
+.TP
+.I "\-D LEVEL", "\-\-debug=LEVEL"
+The Levels are:
+.nf
+100 - Show raw block list, unordered
+ 99 - Show recursive block list, unordered
+ 98 - Show main block list, unordered
+ 97 - Show recursive block list, ordered
+ 96 - Show main block list, ordered
+ 95..90 - reserved
+ 89 - Show all blocks from every character
+ 88 - Show main black blocks from every character
+ 87 - Show guess list for every character
+ 86 - Show best guess for every character
+.fi
+.TP
+.I "\-f", "\-\-force"
+Force overwrite of output file.
+.TP
+.I "\-h", "\-\-help"
+Print an informative help message describing the options and then exit.
+.TP
+.I "\-i", "\-\-invert"
+Invert image levels (white on black).
+.TP
+.I "\-l MODE", "\-\-layout=MODE"
+Enable page layout analysis. The meaning of
+.I MODE
+is:
+.nf
+`0' no analysis at all,
+`1' column separation,
+`2' full analysis.
+.fi
+.TP
+.I "\-o FILE"
+Place the output into
+.I FILE
+instead of into the standard output.
+.TP
+.I "\-v", "\-\-verbose"
+Verbose mode.
+.TP
+.I "\-V", "\-\-version"
+Print the version number of Ocrad on the standard output and then exit.
+.TP
+.I "\-x FILE"
+Write (export) OCR Results File to
+.I FILE
+\.
+.SH BUGS
+If you find a bug in GNU Ocrad, please send electronic mail to
+<bug-ocrad@gnu.org>. Include the version number, which you can find by
+running `ocrad \-\-version'.
+.SH CAVEATS
+.IP \(bu 2
+Scan directly in b/w mode. Convert from grayscale only if you know what
+you are doing.
+.IP \(bu 2
+For better results the characters should be at least 20 pixels high.
+.IP \(bu 2
+Merged characters are always a problem. Try to avoid them.
+.IP \(bu 2
+Very bold or very light (broken) characters are also a problem.
+.IP \(bu 2
+Always see with your own eyes the pbm file before blaming Ocrad for the
+results. Remember the saying, "garbage in, garbage out".
+.SH TODO
+.IP \(bu 2
+Deal with broken characters.
+.IP \(bu 2
+Make a better layout detector. Every character on its line.
+.IP \(bu 2
+Separate (more) merged characters.
+.IP \(bu 2
+Deal better with frames, lines, pictures, etc.
+.IP \(bu 2
+Change to ISO_8859\-15 (update for ISO_8859\-1 with euro sign).
+.IP \(bu 2
+Add an option for recognizing ISO_8859\-9 chars (Turkish).
+.SH GETTING
+.I ocrad
+is available from http://www.gnu.org/software/ocrad/ocrad.html
+.SH AUTHOR
+.nf
+Antonio Diaz <ant_diaz@teleline.es>
+.fi
+.SH HISTORY
+.I ocrad
+0.6 was released in December 2003.
diff --git a/graphics/ocrad/files/patch-Makefile.in b/graphics/ocrad/files/patch-Makefile.in
index 56d780e5a694..dae15cfc0777 100644
--- a/graphics/ocrad/files/patch-Makefile.in
+++ b/graphics/ocrad/files/patch-Makefile.in
@@ -1,8 +1,8 @@
---- Makefile.in.orig Sat Oct 18 01:29:16 2003
-+++ Makefile.in Sun Nov 16 18:18:58 2003
+--- Makefile.in.orig Thu Dec 18 11:11:05 2003
++++ Makefile.in Tue Dec 30 20:20:01 2003
@@ -4,13 +4,14 @@
- DISTNAME = ocrad-0.5
+ DISTNAME = ocrad-0.6
-CXX = g++
-INSTALL = install
@@ -35,8 +35,8 @@
%.o : %.cc
$(CXX) $(CXXFLAGS) -c -o $@ $<
-@@ -42,6 +43,7 @@
- textline.o : block.h character.h iso_8859_1.h textline.h
+@@ -43,6 +44,7 @@
+ recognize2.o : block.h character.h iso_8859_1.h textline.h
textblock.o : block.h character.h textline.h textblock.h
main.o : block.h blockmap.h bitmap.h character.h textline.h textblock.h
+ $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c main.cc
diff --git a/graphics/ocrad/files/patch-bitmap.cc b/graphics/ocrad/files/patch-bitmap.cc
deleted file mode 100644
index 4768997c17b1..000000000000
--- a/graphics/ocrad/files/patch-bitmap.cc
+++ /dev/null
@@ -1,34 +0,0 @@
---- bitmap.cc.orig Sun Nov 16 17:28:14 2003
-+++ bitmap.cc Sun Nov 16 17:33:30 2003
-@@ -24,12 +24,11 @@
- #include "rectangle.h"
- #include "bitmap.h"
-
--
- namespace {
-
- char pbm_getrawbyte( FILE * f ) throw( Bitmap::Error )
- {
-- int ch = std::getc( f );
-+ int ch = getc( f );
-
- if( ch == EOF )
- throw Bitmap::Error( "end-of-file reading pbm file.\n" );
-@@ -58,7 +57,7 @@
- char ch;
- int i = 0;
-
-- do ch = pbm_getc( f ); while( std::isspace( ch ) );
-+ do ch = pbm_getc( f ); while( isspace( ch ) );
- if( !std::isdigit( ch ) )
- throw Bitmap::Error( "junk in pbm file where an integer should be.\n" );
- do { i = (i * 10) + (ch - '0'); ch = pbm_getc( f ); }
-@@ -71,7 +70,7 @@
- {
- char ch;
-
-- do ch = pbm_getc( f ); while( std::isspace( ch ) );
-+ do ch = pbm_getc( f ); while( isspace( ch ) );
-
- if( ch == '0' ) return false;
- if( ch == '1' ) return true;
diff --git a/graphics/ocrad/files/patch-character.h b/graphics/ocrad/files/patch-character.h
deleted file mode 100644
index 5dacbaf8555a..000000000000
--- a/graphics/ocrad/files/patch-character.h
+++ /dev/null
@@ -1,10 +0,0 @@
---- character.h.orig Sun Nov 16 17:40:28 2003
-+++ character.h Sun Nov 16 17:40:44 2003
-@@ -64,6 +64,7 @@
- void swap_guesses( int i, int j ) throw();
- const Guess & guess( int i ) const throw();
- int guesses() const throw() { return _guess.size(); }
-+#undef isalnum
- bool isalnum() const throw();
-
- void join( Character & c ) throw();
diff --git a/graphics/ocrad/files/patch-configure b/graphics/ocrad/files/patch-configure
deleted file mode 100644
index 802fa316e8ee..000000000000
--- a/graphics/ocrad/files/patch-configure
+++ /dev/null
@@ -1,29 +0,0 @@
---- configure.orig Mon Aug 25 00:07:09 2003
-+++ configure Wed Nov 12 22:50:24 2003
-@@ -13,7 +13,7 @@
- while true ; do
-
- # Break out if there are no more args
-- if [ $# == 0 ]; then break ; fi
-+ if [ $# = 0 ]; then break ; fi
-
- # Get the first arg, and shuffle
- option=$1
-@@ -48,7 +48,7 @@
-
- # Find the source files, if location was not specified.
- srcdirtext=
--if [ x${srcdir} == x ]; then
-+if [ x${srcdir} = x ]; then
- srcdirtext="or . or .." ; srcdir=.
- if [ ! -r ${srcdir}/${srctrigger} ] ; then srcdir=.. ; fi
- if [ ! -r ${srcdir}/${srctrigger} ] ; then
-@@ -65,7 +65,7 @@
- fi
-
- # Set srcdir to . if that's what it is.
--if [ $(pwd) == $(cd ${srcdir} ; pwd) ] ; then srcdir=. ; fi
-+if [ $(pwd) = $(cd ${srcdir} ; pwd) ] ; then srcdir=. ; fi
-
- # write variables to config file.
- rm -f Makefile
diff --git a/graphics/ocrad/files/patch-iso_8859_1.h b/graphics/ocrad/files/patch-iso_8859_1.h
index 9d07257db560..9e78ee087ed8 100644
--- a/graphics/ocrad/files/patch-iso_8859_1.h
+++ b/graphics/ocrad/files/patch-iso_8859_1.h
@@ -1,14 +1,15 @@
---- iso_8859_1.h.orig Sun Nov 16 17:14:19 2003
-+++ iso_8859_1.h Sun Nov 16 17:33:51 2003
-@@ -100,6 +100,11 @@
+--- iso_8859_1.h.orig Wed Dec 3 12:12:01 2003
++++ iso_8859_1.h Tue Dec 30 17:12:05 2003
+@@ -100,6 +100,12 @@
static unsigned char base_letter( unsigned char ch ) throw();
static unsigned char compose( unsigned char base_letter,
unsigned char accent ) throw();
+#undef isalnum
+#undef isalpha
++#undef islower
+#undef isupper
+#undef isvowel
+#undef toupper
static bool isalnum( unsigned char ch ) throw();
static bool isalpha( unsigned char ch ) throw();
- static bool isupper( unsigned char ch ) throw();
+ static bool islower( unsigned char ch ) throw();
diff --git a/graphics/ocrad/files/patch-main.cc b/graphics/ocrad/files/patch-main.cc
deleted file mode 100644
index e0497bf2e2f6..000000000000
--- a/graphics/ocrad/files/patch-main.cc
+++ /dev/null
@@ -1,21 +0,0 @@
---- main.cc.orig Sat Oct 18 01:27:29 2003
-+++ main.cc Sun Nov 16 18:16:41 2003
-@@ -28,6 +28,7 @@
- #include <cstring>
- #include <vector>
- #include <getopt.h>
-+#include <libgen.h>
- #include "common.h"
- #include "rectangle.h"
- #include "bitmap.h"
-@@ -289,8 +290,8 @@
- {
- if( infile == stdin )
- {
-- std::ungetc( std::getc( infile ), infile );
-- if( std::feof( infile ) || std::ferror( infile ) ) infile = 0;
-+ ungetc( getc( infile ), infile );
-+ if( feof( infile ) || ferror( infile ) ) infile = 0;
- }
- while( infile != stdin )
- {
diff --git a/graphics/ocrad/files/patch-textline.cc b/graphics/ocrad/files/patch-textline.cc
deleted file mode 100644
index 804a281ab3fa..000000000000
--- a/graphics/ocrad/files/patch-textline.cc
+++ /dev/null
@@ -1,146 +0,0 @@
---- textline.cc.orig Sun Nov 16 17:39:59 2003
-+++ textline.cc Sun Nov 16 17:44:01 2003
-@@ -33,7 +33,7 @@
- for( ; end < characters(); ++end )
- {
- Character & c = character( end );
-- if( c.guesses() && std::isspace( c.guess(0).ch ) ) break;
-+ if( c.guesses() && isspace( c.guess(0).ch ) ) break;
- }
- return end;
- }
-@@ -264,11 +264,11 @@
- if( c1.guesses() == 1 )
- {
- unsigned char ch = c1.guess( 0 ).ch;
-- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
-+ if( isspace( ch ) ) { begin = i + 1 ; continue; }
- if( ch != 'c' && ch != 'o' && ch != 's' && ch != 'u' && ch != 'v' &&
- ch != 'w' && ch != 'x' && ch != 'z' ) continue;
- if( 4 * c1.height() > 5 * mean_height() )
-- { c1.only_guess( std::toupper( ch ), 0 ); continue; }
-+ { c1.only_guess( toupper( ch ), 0 ); continue; }
- if( 5 * c1.height() < 4 * mean_height() ) continue;
- for( int j = begin; j < characters(); ++j ) if( j != i )
- {
-@@ -276,12 +276,12 @@
- if( c2.guesses() >= 1 )
- {
- unsigned char ch2 = c2.guess( 0 ).ch;
-- if( std::isspace( ch2 ) ) break;
-- if( ( std::isalpha( ch2 ) && 4 * c1.height() > 5 * c2.height() ) ||
-- ( ( std::isupper( ch2 ) || ch2 == 'l' ) && ch2 != 'B' &&
-+ if( isspace( ch2 ) ) break;
-+ if( ( isalpha( ch2 ) && 4 * c1.height() > 5 * c2.height() ) ||
-+ ( ( isupper( ch2 ) || ch2 == 'l' ) && ch2 != 'B' &&
- ( c1.height() >= c2.height() ||
- Ocrad::similar( c1.height(), c2.height(), 10 ) ) ) )
-- { c1.insert_guess( 0, std::toupper( ch ), 1 ); break; }
-+ { c1.insert_guess( 0, toupper( ch ), 1 ); break; }
- }
- }
- }
-@@ -310,7 +310,7 @@
- if( c1.guesses() >= 1 )
- {
- unsigned char ch = c1.guess( 0 ).ch;
-- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
-+ if( isspace( ch ) ) { begin = i + 1 ; continue; }
- if( ch <= 127 || c1.block_vector().size() < 2 ) continue;
- int chb = ISO_8859_1::base_letter( ch );
- if( chb != 'o' && chb != 'u' ) continue;
-@@ -323,11 +323,11 @@
- unsigned char ch2 = c2.guess( 0 ).ch;
- int ch2b = ISO_8859_1::base_letter( ch2 );
- if( !ch2b && ch2 > 127 ) continue;
-- if( std::isspace( ch2 ) ) break;
-- if( ( std::isalpha( ch2 ) && 4 * b1.height() > 5 * c2.height() ) ||
-- ( std::isupper( ch2 ) && Ocrad::similar( b1.height(), c2.height(), 10 ) ) ||
-- ( std::isalpha( ch2b ) && 4 * c1.height() > 5 * c2.height() ) ||
-- ( std::isupper( ch2b ) && Ocrad::similar( c1.height(), c2.height(), 10 ) ) )
-+ if( isspace( ch2 ) ) break;
-+ if( ( isalpha( ch2 ) && 4 * b1.height() > 5 * c2.height() ) ||
-+ ( isupper( ch2 ) && Ocrad::similar( b1.height(), c2.height(), 10 ) ) ||
-+ ( isalpha( ch2b ) && 4 * c1.height() > 5 * c2.height() ) ||
-+ ( isupper( ch2b ) && Ocrad::similar( c1.height(), c2.height(), 10 ) ) )
- { c1.insert_guess( 0, ISO_8859_1::toupper( ch ), 1 ); break; }
- }
- }
-@@ -341,7 +341,7 @@
- if( c1.guesses() >= 1 )
- {
- unsigned char ch = c1.guess( 0 ).ch;
-- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
-+ if( isspace( ch ) ) { begin = i + 1 ; continue; }
- if( ch != 'o' && ch != 'O' && ch != 'l' ) continue;
- for( int j = begin; j < characters(); ++j ) if( j != i )
- {
-@@ -349,8 +349,8 @@
- if( c2.guesses() >= 1 )
- {
- unsigned char ch2 = c2.guess( 0 ).ch;
-- if( std::isspace( ch2 ) ) break;
-- if( std::isdigit( ch2 ) )
-+ if( isspace( ch2 ) ) break;
-+ if( isdigit( ch2 ) )
- {
- if( Ocrad::similar( c1.height(), c2.height(), 10 ) )
- c1.insert_guess( 0, (ch == 'l') ? '1' : '0', c1.guess(0).value + 1 );
-@@ -372,7 +372,7 @@
- Character & c2 = character( i );
- if( !c2.guesses() ) continue;
- unsigned char ch = c2.guess( 0 ).ch;
-- if( !std::isalnum( ch ) && ch != '.' && ch != '|' ) continue;
-+ if( !isalnum( ch ) && ch != '.' && ch != '|' ) continue;
- switch( ch )
- {
- case 'g': case 'j': case 'p': case 'q': case 'y':
-@@ -426,14 +426,14 @@
- if( i < characters() - 1 && character( i + 1 ).guesses() )
- rch = character( i + 1 ).guess( 0 ).ch;
- if( ISO_8859_1::isupper( rch ) &&
-- ( !lch || ISO_8859_1::isupper( lch ) || std::isspace( lch ) ) )
-+ ( !lch || ISO_8859_1::isupper( lch ) || isspace( lch ) ) )
- { c.insert_guess( 0, 'I', 1 ); continue; }
- if( ch == 'l' ) continue;
- if( ISO_8859_1::isalpha( lch ) || ISO_8859_1::isalpha( rch ) )
- { c.insert_guess( 0, 'l', 1 ); continue; }
-- if( rch == '|' && ( !lch || std::isspace( lch ) ) &&
-+ if( rch == '|' && ( !lch || isspace( lch ) ) &&
- i < characters() - 2 && character( i + 2 ).guesses() &&
-- std::isalpha( character( i + 2 ).guess( 0 ).ch ) )
-+ isalpha( character( i + 2 ).guess( 0 ).ch ) )
- { c.insert_guess( 0, 'l', 1 ); continue; }
- }
- }
-@@ -475,7 +475,7 @@
- if( c.guesses() )
- {
- unsigned char ch = c.guess( 0 ).ch;
-- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
-+ if( isspace( ch ) ) { begin = i + 1 ; continue; }
- if( i == begin && ch == 'a' && c.guesses() == 2 &&
- c.guess( 1 ).ch == 'Q' && 4 * c.height() > 5 * mean_height() )
- c.swap_guesses( 0, 1 );
-@@ -501,7 +501,7 @@
- if( c1.guesses() )
- {
- unsigned char ch = c1.guess(0).ch;
-- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
-+ if( isspace( ch ) ) { begin = i + 1 ; continue; }
- if( c1.guesses() != 2 || ch != 'B' || c1.guess(1).ch != 'a' ) continue;
- if( 4 * c1.height() > 5 * mean_height() ) continue;
- for( int j = begin; j < characters(); ++j ) if( j != i )
-@@ -510,9 +510,9 @@
- if( c2.guesses() >= 1 )
- {
- unsigned char ch2 = c2.guess(0).ch;
-- if( std::isspace( ch2 ) ) break;
-- if( ( std::isalpha( ch2 ) && 5 * c1.height() < 4 * c2.height() ) ||
-- ( std::islower( ch2 ) &&
-+ if( isspace( ch2 ) ) break;
-+ if( ( isalpha( ch2 ) && 5 * c1.height() < 4 * c2.height() ) ||
-+ ( islower( ch2 ) &&
- ( c1.height() <= c2.height() ||
- Ocrad::similar( c1.height(), c2.height(), 10 ) ) ) )
- { c1.swap_guesses( 0, 1 ); break; }