diff options
author | Thierry Thomas <thierry@FreeBSD.org> | 2007-08-23 22:13:35 +0000 |
---|---|---|
committer | Thierry Thomas <thierry@FreeBSD.org> | 2007-08-23 22:13:35 +0000 |
commit | 3a00ffa77e26933c3406b9a99a9347e47afa47ca (patch) | |
tree | 4d4227847615ed17c93dfea8ccfaee024231eafa /textproc/libtextcat/files/patch-src_constants.h | |
parent | - Update to 20070822 (diff) |
Import patches (imported from OpenOffice.org 2.3 by Fedora, at least in
part). These patches, released under a BSD license, seem to improve the
accuracy of language detection, especially those that don't have a
Latin script.
Notes
Notes:
svn path=/head/; revision=198222
Diffstat (limited to 'textproc/libtextcat/files/patch-src_constants.h')
-rw-r--r-- | textproc/libtextcat/files/patch-src_constants.h | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/textproc/libtextcat/files/patch-src_constants.h b/textproc/libtextcat/files/patch-src_constants.h new file mode 100644 index 000000000000..4e4f5d2d02aa --- /dev/null +++ b/textproc/libtextcat/files/patch-src_constants.h @@ -0,0 +1,45 @@ +--- src/constants.h.orig Thu May 22 13:32:43 2003 ++++ src/constants.h Thu Aug 23 22:47:07 2007 +@@ -39,6 +39,8 @@ + */ + #include <limits.h> + ++#define _UTF8_ ++ + #define DESCRIPTION "out of place" + + /* Reported matches are those fingerprints with a score less than best +@@ -59,14 +61,21 @@ + /* Maximum number of n-grams in a fingerprint */ + #define MAXNGRAMS 400 + +-/* Maximum size of an n-gram? */ +-#define MAXNGRAMSIZE 5 ++/* Maximum number of character of an n-gram? */ ++#define MAXNGRAMSYMBOL 5 ++ ++/* Maximum size of the string representing an n-gram (must be greater than number of symbol) */ ++#ifdef _UTF8_ ++#define MAXNGRAMSIZE 20 ++#else ++#define MAXNGRAMSIZE MAXNGRAMSYMBOL ++#endif + + /* Which characters are not acceptable in n-grams? */ + #define INVALID(c) (isspace((int)c) || isdigit((int)c)) + + /* Minimum size (in characters) for accepting a document */ +-#define MINDOCSIZE 25 ++#define MINDOCSIZE 6 + + /* Maximum penalty for missing an n-gram in fingerprint */ + #define MAXOUTOFPLACE 400 +@@ -75,5 +84,8 @@ + #define TABLEPOW 13 + + #define MAXSCORE INT_MAX ++ ++/* where the fingerprints files are stored */ ++#define DEFAULT_FINGERPRINTS_PATH "" + + #endif |