summaryrefslogtreecommitdiff
path: root/textproc/libtextcat/files/patch-src_constants.h
diff options
context:
space:
mode:
authorThierry Thomas <thierry@FreeBSD.org>2007-08-23 22:13:35 +0000
committerThierry Thomas <thierry@FreeBSD.org>2007-08-23 22:13:35 +0000
commit3a00ffa77e26933c3406b9a99a9347e47afa47ca (patch)
tree4d4227847615ed17c93dfea8ccfaee024231eafa /textproc/libtextcat/files/patch-src_constants.h
parent- Update to 20070822 (diff)
Import patches (imported from OpenOffice.org 2.3 by Fedora, at least in
part). These patches, released under a BSD license, seem to improve the accuracy of language detection, especially those that don't have a Latin script.
Notes
Notes: svn path=/head/; revision=198222
Diffstat (limited to 'textproc/libtextcat/files/patch-src_constants.h')
-rw-r--r--textproc/libtextcat/files/patch-src_constants.h45
1 files changed, 45 insertions, 0 deletions
diff --git a/textproc/libtextcat/files/patch-src_constants.h b/textproc/libtextcat/files/patch-src_constants.h
new file mode 100644
index 000000000000..4e4f5d2d02aa
--- /dev/null
+++ b/textproc/libtextcat/files/patch-src_constants.h
@@ -0,0 +1,45 @@
+--- src/constants.h.orig Thu May 22 13:32:43 2003
++++ src/constants.h Thu Aug 23 22:47:07 2007
+@@ -39,6 +39,8 @@
+ */
+ #include <limits.h>
+
++#define _UTF8_
++
+ #define DESCRIPTION "out of place"
+
+ /* Reported matches are those fingerprints with a score less than best
+@@ -59,14 +61,21 @@
+ /* Maximum number of n-grams in a fingerprint */
+ #define MAXNGRAMS 400
+
+-/* Maximum size of an n-gram? */
+-#define MAXNGRAMSIZE 5
++/* Maximum number of character of an n-gram? */
++#define MAXNGRAMSYMBOL 5
++
++/* Maximum size of the string representing an n-gram (must be greater than number of symbol) */
++#ifdef _UTF8_
++#define MAXNGRAMSIZE 20
++#else
++#define MAXNGRAMSIZE MAXNGRAMSYMBOL
++#endif
+
+ /* Which characters are not acceptable in n-grams? */
+ #define INVALID(c) (isspace((int)c) || isdigit((int)c))
+
+ /* Minimum size (in characters) for accepting a document */
+-#define MINDOCSIZE 25
++#define MINDOCSIZE 6
+
+ /* Maximum penalty for missing an n-gram in fingerprint */
+ #define MAXOUTOFPLACE 400
+@@ -75,5 +84,8 @@
+ #define TABLEPOW 13
+
+ #define MAXSCORE INT_MAX
++
++/* where the fingerprints files are stored */
++#define DEFAULT_FINGERPRINTS_PATH ""
+
+ #endif