summaryrefslogtreecommitdiff
path: root/textproc/srilm
diff options
context:
space:
mode:
authorYen-Ming Lee <leeym@FreeBSD.org>2004-02-21 18:07:23 +0000
committerYen-Ming Lee <leeym@FreeBSD.org>2004-02-21 18:07:23 +0000
commit089ae7bb8b8f3128b695bea366cde9f8f0230481 (patch)
tree24867814681c18c76f21c6c477a43e5a9aab8a4b /textproc/srilm
parentUpdate to 1.12.1. (diff)
SRILM is a toolkit for building and applying statistical language models (LMs),
primarily for use in speech recognition, statistical tagging and segmentation. It has been under development in the SRI Speech Technology and Research Laboratory since 1995. The toolkit has also greatly benefitted from its use and enhancements during the Johns Hopkins University/CLSP summer workshops in 1995, 1996, and 1997 SRILM consists of the following components: * A set of C++ class libraries implementing language models, supporting data stuctures and miscellaneous utility functions. * A set of executable programs built on top of these libraries to perform standard tasks such as training LMs and testing them on data, tagging or segmenting text, etc. * A collection of miscellaneous scripts facilitating minor related tasks. WWW: http://www.speech.sri.com/projects/srilm/ Author: stolcke@speech.sri.com PR: 60810 Submitted by: Cheng-Lung Sung <clsung@dragon2.net>
Notes
Notes: svn path=/head/; revision=101586
Diffstat (limited to 'textproc/srilm')
-rw-r--r--textproc/srilm/Makefile105
-rw-r--r--textproc/srilm/distinfo2
-rw-r--r--textproc/srilm/files/patch-Makefile.common.variables11
-rw-r--r--textproc/srilm/files/patch-Makefile.machine.i38672
-rw-r--r--textproc/srilm/files/patch-bin-machine-type11
-rw-r--r--textproc/srilm/files/patch-go.run-test12
-rw-r--r--textproc/srilm/files/patch-lattice-src-Makefile11
-rw-r--r--textproc/srilm/files/patch-lm-src-Makefile11
-rw-r--r--textproc/srilm/pkg-descr19
-rw-r--r--textproc/srilm/pkg-message8
-rw-r--r--textproc/srilm/pkg-plist84
11 files changed, 346 insertions, 0 deletions
diff --git a/textproc/srilm/Makefile b/textproc/srilm/Makefile
new file mode 100644
index 000000000000..f3e8e3fb4475
--- /dev/null
+++ b/textproc/srilm/Makefile
@@ -0,0 +1,105 @@
+# New ports collection makefile for: srilm
+# Date created: Wed Dec 31 16:17:30 CST 2003
+# Whom: Cheng-Lung Sung <clsung@dragon2.net>
+#
+# $FreeBSD$
+#
+
+PORTNAME= srilm
+PORTVERSION= 1.3.3
+CATEGORIES= textproc
+DISTNAME= srilm
+EXTRACT_SUFX?= .gz
+
+MAINTAINER= clsung@dragon2.net
+COMMENT= Toolkit for building and applying statistical language models
+
+LIB_DEPENDS= tcl83.1:${PORTSDIR}/lang/tcl83
+
+DOWNLOAD_URL?= http://www.speech.sri.com/projects/srilm/download.html
+
+USE_GMAKE= yes
+MAKE_ENV+= SRILM=${WRKSRC}
+ALL_TARGET= World
+ONLY_FOR_ARCHS= i386
+USE_REINPLACE= yes
+NO_PACKAGE= "not yet"
+
+PROGRAMS= ngram ngram-count ngram-merge ngram-class disambig anti-ngram \
+ nbest-lattice nbest-mix nbest-optimize nbest-pron-score \
+ segment segment-nbest hidden-ngram multi-ngram \
+ lattice-tool
+SCRIPTS= change-lm-vocab rescore-decipher rescore-acoustic \
+ rescore-reweight rescore-minimize-wer make-batch-counts \
+ merge-batch-counts make-big-lm make-multiword-pfsg \
+ pfsg-from-ngram nbest-error nbest-rover align-with-tags \
+ compute-sclite compare-sclite
+GAWK_SCRIPTS= add-classes-to-pfsg add-dummy-bows add-pauses-to-pfsg \
+ add-ppls bytelog-to-log10 classes-to-fsm \
+ combine-acoustic-scores compare-ppls compute-best-mix \
+ compute-best-rover-mix compute-best-sentence-mix \
+ compute-oov-rate continuous-ngram-count \
+ extract-skip-probs find-reference-posteriors \
+ fsm-to-pfsg get-gt-counts hits-from-log \
+ log10-to-bytelog make-abs-discount make-diacritic-map \
+ make-gt-discounts make-kn-discounts make-kn-counts \
+ make-hiddens-lm make-lm-subset make-nbest-pfsg \
+ make-ngram-pfsg make-sub-lm sort-lm \
+ reverse-lm merge-nbest nbest-posteriors \
+ nbest2-to-nbest1 nbest-vocab pfsg-to-dot pfsg-to-fsm \
+ pfsg-vocab ppl-from-log remove-lowprob-ngrams \
+ replace-words-with-classes reverse-text \
+ reverse-ngram-counts sentid-to-sclite sentid-to-ctm \
+ subtract-ppls uniform-classes vp2text wlat-to-dot \
+ wlat-to-pfsg wlat-stats wordlat-to-lisp prettify
+MAN1= ngram.1 ngram-count.1 ngram-class.1 ngram-merge.1 disambig.1 \
+ nbest-lattice.1 nbest-optimize.1 nbest-mix.1 \
+ nbest-pron-score.1 segment.1 segment-nbest.1 hidden-ngram.1 \
+ anti-ngram.1 multi-ngram.1 lattice-tool.1 training-scripts.1 \
+ lm-scripts.1 ppl-scripts.1 pfsg-scripts.1 nbest-scripts.1
+MAN3= Prob.3 Vocab.3 LM.3 File.3
+MAN5= ngram-format.5 nbest-format.5 classes-format.5 \
+ pfsg-format.5 wlat-format.5
+
+.include <bsd.port.pre.mk>
+
+.if !exists(${DISTDIR}/${DISTNAME}${EXTRACT_SUFX})
+ECHO_MSG=/usr/bin/printf
+IGNORE= :\n\
+Because of license document, we suggest you fetch the source distribution\n\
+manually. Please access ${DOWNLOAD_URL}\n\
+, follow the download instructions, download the source\n\
+into ${DISTDIR}/, and then run make again.\n
+.endif
+
+do-extract:
+ @${RM} -rf ${WRKDIR}
+ @${MKDIR} ${WRKSRC}
+ @if ! (cd ${WRKSRC} && ${EXTRACT_CMD} ${EXTRACT_BEFORE_ARGS} ${_DISTDIR}/${DISTNAME}${EXTRACT_SUFX} ${EXTRACT_AFTER_ARGS} );\
+ then \
+ exit 1; \
+ fi
+
+post-patch:
+ @cd ${WRKSRC} && \
+ ${REINPLACE_CMD} -e 's,$$WRKSRC,${WRKSRC},g' \
+ test/go.run-test
+
+do-install:
+ @${MKDIR} ${EXAMPLESDIR}
+ cd ${WRKSRC}/bin/${MACHINE_ARCH} && \
+ ${INSTALL_PROGRAM} ${PROGRAMS} ${PREFIX}/bin && \
+ ${INSTALL_SCRIPT} ${GAWK_SCRIPTS} ${EXAMPLESDIR} && \
+ cd ${WRKSRC}/man/man1 && \
+ ${INSTALL_MAN} ${MAN1} ${MANPREFIX}/man/man1 && \
+ cd ${WRKSRC}/man/man3 && \
+ ${INSTALL_MAN} ${MAN3} ${MANPREFIX}/man/man3 && \
+ cd ${WRKSRC}/man/man5 && \
+ ${INSTALL_MAN} ${MAN5} ${MANPREFIX}/man/man5 && \
+ cd ${WRKSRC}/bin && \
+ ${INSTALL_SCRIPT} ${SCRIPTS} ${EXAMPLESDIR}
+
+post-install:
+ @${CAT} ${PKGMESSAGE}
+
+.include <bsd.port.post.mk>
diff --git a/textproc/srilm/distinfo b/textproc/srilm/distinfo
new file mode 100644
index 000000000000..be8a24f2b183
--- /dev/null
+++ b/textproc/srilm/distinfo
@@ -0,0 +1,2 @@
+MD5 (srilm.gz) = fc9ec46ba80466fcb6586899eead4f86
+SIZE (srilm.gz) = 28246362
diff --git a/textproc/srilm/files/patch-Makefile.common.variables b/textproc/srilm/files/patch-Makefile.common.variables
new file mode 100644
index 000000000000..4c90146bc4a5
--- /dev/null
+++ b/textproc/srilm/files/patch-Makefile.common.variables
@@ -0,0 +1,11 @@
+--- common/Makefile.common.variables.orig Sat Feb 22 06:43:31 2003
++++ common/Makefile.common.variables Thu Jan 1 02:57:25 2004
+@@ -52,7 +52,7 @@
+ DEMANGLE_FILTER = 2>&1 | c++filt
+
+ # Path to GNU awk; used in editing scripts
+-GAWK = /usr/local/bin/gawk
++GAWK = /usr/bin/awk
+
+ # Include machine-type dependent variables
+ include $(SRILM)/common/Makefile.machine.$(MACHINE_TYPE)
diff --git a/textproc/srilm/files/patch-Makefile.machine.i386 b/textproc/srilm/files/patch-Makefile.machine.i386
new file mode 100644
index 000000000000..5b30e33973c6
--- /dev/null
+++ b/textproc/srilm/files/patch-Makefile.machine.i386
@@ -0,0 +1,72 @@
+--- common/Makefile.machine.i386.orig Thu Jan 1 12:44:20 2004
++++ common/Makefile.machine.i386 Thu Jan 1 12:38:27 2004
+@@ -0,0 +1,69 @@
++#
++# File: Makefile.i686
++# Author: The SRI DECIPHER (TM) System
++# Date: Fri Feb 19 22:45:31 PST 1999
++#
++# Description:
++# Machine dependent compilation options and variable definitions
++# for Linux/i686 platform
++#
++# Copyright (c) 1999-2001 SRI International. All Rights Reserved.
++#
++# $Header: /home/srilm/devel/common/RCS/Makefile.machine.i686,v 1.8 2003/02/21 22:30:00 stolcke Exp $
++#
++
++ # Use the GNU C compiler.
++ GCC_FLAGS =
++ CC = gcc $(GCC_FLAGS)
++ CXX = g++ -Wno-deprecated $(GCC_FLAGS) -DINSTANTIATE_TEMPLATES
++
++ # Optional compilation flags.
++ OPTIMIZE_FLAGS = -g -O2
++ DEBUG_FLAGS = -g -DDEBUG
++ PROFILE_FLAGS = -g -pg -O2
++
++ # Optional linking flags.
++ EXPORT_LDFLAGS = -s
++
++ # Shared compilation flags.
++ CFLAGS = $(ADDITIONAL_CFLAGS) $(INCLUDES)
++ CXXFLAGS = $(ADDITIONAL_CXXFLAGS) $(INCLUDES)
++
++ # Shared linking flags.
++ LDFLAGS = $(ADDITIONAL_LDFLAGS) -L$(SRILM_LIBDIR)
++
++ # Other useful compilation flags.
++ ADDITIONAL_CFLAGS =
++ ADDITIONAL_CXXFLAGS =
++
++ # Other useful include directories.
++ ADDITIONAL_INCLUDES =
++
++ # Other useful linking flags.
++ ADDITIONAL_LDFLAGS =
++
++ # Other useful libraries.
++ ADDITIONAL_LIBRARIES = -L/home/clsung/srilm/work/srilm/dstruct/obj/i686 -L/home/clsung/srilm/work/srilm/misc/obj/i686 -L/home/clsung/srilm/work/srilm/htk/obj/i686 -L/home/clsung/srilm/work/srilm/lattice/obj/i686 -L/home/clsung/srilm/work/srilm/utils/obj/i686 -L/home/clsung/srilm/work/srilm/lm/obj/i686 -lm
++
++ # run-time linker path flag
++ RLD_FLAG = -R
++
++ # Tcl support (standard in Linux)
++ TCL_INCLUDE = -I/usr/local/include/tcl8.3
++ TCL_LIBRARY = -L/usr/local/lib -ltcl83
++
++ # No ranlib
++ RANLIB = :
++
++ # Generate dependencies from source files.
++ GEN_DEP = $(CC) $(CFLAGS) -MM
++
++ GEN_DEP.cc = $(CXX) $(CXXFLAGS) -MM
++
++ # Run lint.
++ LINT = lint
++ LINT_FLAGS = -DDEBUG $(CFLAGS)
++
++ # Location of awk binary
++ GAWK = /usr/bin/awk
++
diff --git a/textproc/srilm/files/patch-bin-machine-type b/textproc/srilm/files/patch-bin-machine-type
new file mode 100644
index 000000000000..a5d1f37f33b4
--- /dev/null
+++ b/textproc/srilm/files/patch-bin-machine-type
@@ -0,0 +1,11 @@
+--- bin/machine-type.orig Sat Feb 22 06:04:46 2003
++++ bin/machine-type Thu Jan 1 12:43:08 2004
+@@ -92,6 +92,8 @@
+ set MACHINE_TYPE = macosx
+ else if (`uname -m` == i686) then
+ set MACHINE_TYPE = i686
++ else if (`uname -m` == i386) then
++ set MACHINE_TYPE = i386
+ else
+ ## Generate an error by doing nothing. (Used to be the line below:)
+ ## echo "ERROR: Unsupported machine type: "$RESULT
diff --git a/textproc/srilm/files/patch-go.run-test b/textproc/srilm/files/patch-go.run-test
new file mode 100644
index 000000000000..96800a46af32
--- /dev/null
+++ b/textproc/srilm/files/patch-go.run-test
@@ -0,0 +1,12 @@
+--- test/go.run-test.orig Thu Jan 1 12:15:21 2004
++++ test/go.run-test Thu Jan 1 12:16:10 2004
+@@ -6,6 +6,9 @@
+ # $Header: /home/srilm/devel/test/RCS/go.run-test,v 1.10 2003/02/27 23:42:35 stolcke Exp $
+ #
+
++PATH=$PATH:$WRKSRC/bin:$WRKSRC/bin/i386
++export PATH
++
+ dir=$1
+
+ if [ -z "$MACHINE_TYPE" ]; then
diff --git a/textproc/srilm/files/patch-lattice-src-Makefile b/textproc/srilm/files/patch-lattice-src-Makefile
new file mode 100644
index 000000000000..3d538ac7eab6
--- /dev/null
+++ b/textproc/srilm/files/patch-lattice-src-Makefile
@@ -0,0 +1,11 @@
+--- lattice/src/Makefile.orig Sun Aug 25 05:29:45 2002
++++ lattice/src/Makefile Thu Jan 1 11:12:00 2004
+@@ -131,7 +131,7 @@
+ $(ARCHIVE) $@ $^ $(DEMANGLE_FILTER)
+ $(RANLIB) $@ $(DEMANGLE_FILTER)
+
+-$(PROGRAMS): $(LIBRARY) $(OTHER_LIBRARIES)
++$(PROGRAMS): $(LIBRARY)
+
+ # Variables and Targets for released system
+
diff --git a/textproc/srilm/files/patch-lm-src-Makefile b/textproc/srilm/files/patch-lm-src-Makefile
new file mode 100644
index 000000000000..ea48aee14621
--- /dev/null
+++ b/textproc/srilm/files/patch-lm-src-Makefile
@@ -0,0 +1,11 @@
+--- lm/src/Makefile.orig Sat Feb 22 04:20:46 2003
++++ lm/src/Makefile Thu Jan 1 11:11:40 2004
+@@ -234,7 +234,7 @@
+ $(ARCHIVE) $@ $^ $(DEMANGLE_FILTER)
+ $(RANLIB) $@ $(DEMANGLE_FILTER)
+
+-$(PROGRAMS): $(LIBRARY) $(OTHER_LIBRARIES)
++$(PROGRAMS): $(LIBRARY)
+
+ # Variables and Targets for released system
+
diff --git a/textproc/srilm/pkg-descr b/textproc/srilm/pkg-descr
new file mode 100644
index 000000000000..796b183eee21
--- /dev/null
+++ b/textproc/srilm/pkg-descr
@@ -0,0 +1,19 @@
+SRILM is a toolkit for building and applying statistical language models (LMs),
+primarily for use in speech recognition, statistical tagging and segmentation.
+It has been under development in the SRI Speech Technology and
+Research Laboratory since 1995. The toolkit has also greatly benefitted from
+its use and enhancements during the Johns Hopkins University/CLSP summer
+workshops in 1995, 1996, and 1997
+
+SRILM consists of the following components:
+
+ * A set of C++ class libraries implementing language models,
+ supporting data stuctures and miscellaneous utility functions.
+ * A set of executable programs built on top of these libraries to
+ perform standard tasks such as training LMs and testing them on
+ data, tagging or segmenting text, etc.
+ * A collection of miscellaneous scripts facilitating minor related tasks.
+
+WWW: http://www.speech.sri.com/projects/srilm/
+
+Author: stolcke@speech.sri.com
diff --git a/textproc/srilm/pkg-message b/textproc/srilm/pkg-message
new file mode 100644
index 000000000000..bb224821d79f
--- /dev/null
+++ b/textproc/srilm/pkg-message
@@ -0,0 +1,8 @@
+*******************************Reference*****************************
+
+Published research using SRILM may cite the following paper:
+
+PostScript: http://www.speech.sri.com/papers/icslp2002-srilm.ps.gz
+PDF: http://www.speech.sri.com/cgi-bin/run-distill?papers/icslp2002-srilm.ps.gz
+
+*********************************************************************
diff --git a/textproc/srilm/pkg-plist b/textproc/srilm/pkg-plist
new file mode 100644
index 000000000000..dc8a39a122bb
--- /dev/null
+++ b/textproc/srilm/pkg-plist
@@ -0,0 +1,84 @@
+bin/ngram
+bin/ngram-count
+bin/ngram-merge
+bin/ngram-class
+bin/disambig
+bin/anti-ngram
+bin/nbest-lattice
+bin/nbest-mix
+bin/nbest-optimize
+bin/nbest-pron-score
+bin/segment
+bin/segment-nbest
+bin/hidden-ngram
+bin/multi-ngram
+bin/lattice-tool
+%%EXAMPLESDIR%%/change-lm-vocab
+%%EXAMPLESDIR%%/rescore-decipher
+%%EXAMPLESDIR%%/rescore-acoustic
+%%EXAMPLESDIR%%/rescore-reweight
+%%EXAMPLESDIR%%/rescore-minimize-wer
+%%EXAMPLESDIR%%/make-batch-counts
+%%EXAMPLESDIR%%/merge-batch-counts
+%%EXAMPLESDIR%%/make-big-lm
+%%EXAMPLESDIR%%/make-multiword-pfsg
+%%EXAMPLESDIR%%/pfsg-from-ngram
+%%EXAMPLESDIR%%/nbest-error
+%%EXAMPLESDIR%%/nbest-rover
+%%EXAMPLESDIR%%/align-with-tags
+%%EXAMPLESDIR%%/compute-sclite
+%%EXAMPLESDIR%%/compare-sclite
+%%EXAMPLESDIR%%/add-classes-to-pfsg
+%%EXAMPLESDIR%%/add-dummy-bows
+%%EXAMPLESDIR%%/add-pauses-to-pfsg
+%%EXAMPLESDIR%%/add-ppls
+%%EXAMPLESDIR%%/bytelog-to-log10
+%%EXAMPLESDIR%%/classes-to-fsm
+%%EXAMPLESDIR%%/combine-acoustic-scores
+%%EXAMPLESDIR%%/compare-ppls
+%%EXAMPLESDIR%%/compute-best-mix
+%%EXAMPLESDIR%%/compute-best-rover-mix
+%%EXAMPLESDIR%%/compute-best-sentence-mix
+%%EXAMPLESDIR%%/compute-oov-rate
+%%EXAMPLESDIR%%/continuous-ngram-count
+%%EXAMPLESDIR%%/extract-skip-probs
+%%EXAMPLESDIR%%/find-reference-posteriors
+%%EXAMPLESDIR%%/fsm-to-pfsg
+%%EXAMPLESDIR%%/get-gt-counts
+%%EXAMPLESDIR%%/hits-from-log
+%%EXAMPLESDIR%%/log10-to-bytelog
+%%EXAMPLESDIR%%/make-abs-discount
+%%EXAMPLESDIR%%/make-diacritic-map
+%%EXAMPLESDIR%%/make-gt-discounts
+%%EXAMPLESDIR%%/make-kn-discounts
+%%EXAMPLESDIR%%/make-kn-counts
+%%EXAMPLESDIR%%/make-hiddens-lm
+%%EXAMPLESDIR%%/make-lm-subset
+%%EXAMPLESDIR%%/make-nbest-pfsg
+%%EXAMPLESDIR%%/make-ngram-pfsg
+%%EXAMPLESDIR%%/make-sub-lm
+%%EXAMPLESDIR%%/sort-lm
+%%EXAMPLESDIR%%/reverse-lm
+%%EXAMPLESDIR%%/merge-nbest
+%%EXAMPLESDIR%%/nbest-posteriors
+%%EXAMPLESDIR%%/nbest2-to-nbest1
+%%EXAMPLESDIR%%/nbest-vocab
+%%EXAMPLESDIR%%/pfsg-to-dot
+%%EXAMPLESDIR%%/pfsg-to-fsm
+%%EXAMPLESDIR%%/pfsg-vocab
+%%EXAMPLESDIR%%/ppl-from-log
+%%EXAMPLESDIR%%/remove-lowprob-ngrams
+%%EXAMPLESDIR%%/replace-words-with-classes
+%%EXAMPLESDIR%%/reverse-text
+%%EXAMPLESDIR%%/reverse-ngram-counts
+%%EXAMPLESDIR%%/sentid-to-sclite
+%%EXAMPLESDIR%%/sentid-to-ctm
+%%EXAMPLESDIR%%/subtract-ppls
+%%EXAMPLESDIR%%/uniform-classes
+%%EXAMPLESDIR%%/vp2text
+%%EXAMPLESDIR%%/wlat-to-dot
+%%EXAMPLESDIR%%/wlat-to-pfsg
+%%EXAMPLESDIR%%/wlat-stats
+%%EXAMPLESDIR%%/wordlat-to-lisp
+%%EXAMPLESDIR%%/prettify
+@dirrm %%EXAMPLESDIR%%