summaryrefslogtreecommitdiff
path: root/textproc/py-tokenizer
diff options
context:
space:
mode:
Diffstat (limited to 'textproc/py-tokenizer')
-rw-r--r--textproc/py-tokenizer/Makefile22
-rw-r--r--textproc/py-tokenizer/distinfo3
-rw-r--r--textproc/py-tokenizer/pkg-descr5
3 files changed, 30 insertions, 0 deletions
diff --git a/textproc/py-tokenizer/Makefile b/textproc/py-tokenizer/Makefile
new file mode 100644
index 000000000000..b4ad88c9c8d9
--- /dev/null
+++ b/textproc/py-tokenizer/Makefile
@@ -0,0 +1,22 @@
+PORTNAME= tokenizer
+PORTVERSION= 3.5.0
+PORTREVISION= 1
+CATEGORIES= textproc python
+MASTER_SITES= PYPI
+PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER= otis@FreeBSD.org
+COMMENT= Tokenizer for Icelandic text
+WWW= https://github.com/mideind/Tokenizer
+
+LICENSE= MIT
+
+BUILD_DEPENDS= ${PY_SETUPTOOLS} \
+ ${PYTHON_PKGNAMEPREFIX}wheel>0:devel/py-wheel@${PY_FLAVOR}
+
+USES= python
+USE_PYTHON= autoplist concurrent pep517
+
+NO_ARCH= yes
+
+.include <bsd.port.mk>
diff --git a/textproc/py-tokenizer/distinfo b/textproc/py-tokenizer/distinfo
new file mode 100644
index 000000000000..5002d345dfbd
--- /dev/null
+++ b/textproc/py-tokenizer/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1756293103
+SHA256 (tokenizer-3.5.0.tar.gz) = f26694d6be85815d23db167ee9b1c9e2ed7b672cfc8b9baa607ba3aba6070727
+SIZE (tokenizer-3.5.0.tar.gz) = 127323
diff --git a/textproc/py-tokenizer/pkg-descr b/textproc/py-tokenizer/pkg-descr
new file mode 100644
index 000000000000..c1f700edffe5
--- /dev/null
+++ b/textproc/py-tokenizer/pkg-descr
@@ -0,0 +1,5 @@
+Tokenizer: A tokenizer for Icelandic text
+
+Tokenization is a necessary first step in many natural language processing
+tasks, such as word counting, parsing, spell checking, corpus generation, and
+statistical analysis of text.