From 3a00ffa77e26933c3406b9a99a9347e47afa47ca Mon Sep 17 00:00:00 2001 From: Thierry Thomas Date: Thu, 23 Aug 2007 22:13:35 +0000 Subject: Import patches (imported from OpenOffice.org 2.3 by Fedora, at least in part). These patches, released under a BSD license, seem to improve the accuracy of language detection, especially those that don't have a Latin script. --- textproc/libtextcat/files/patch-src_constants.h | 45 +++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 textproc/libtextcat/files/patch-src_constants.h (limited to 'textproc/libtextcat/files/patch-src_constants.h') diff --git a/textproc/libtextcat/files/patch-src_constants.h b/textproc/libtextcat/files/patch-src_constants.h new file mode 100644 index 000000000000..4e4f5d2d02aa --- /dev/null +++ b/textproc/libtextcat/files/patch-src_constants.h @@ -0,0 +1,45 @@ +--- src/constants.h.orig Thu May 22 13:32:43 2003 ++++ src/constants.h Thu Aug 23 22:47:07 2007 +@@ -39,6 +39,8 @@ + */ + #include + ++#define _UTF8_ ++ + #define DESCRIPTION "out of place" + + /* Reported matches are those fingerprints with a score less than best +@@ -59,14 +61,21 @@ + /* Maximum number of n-grams in a fingerprint */ + #define MAXNGRAMS 400 + +-/* Maximum size of an n-gram? */ +-#define MAXNGRAMSIZE 5 ++/* Maximum number of character of an n-gram? */ ++#define MAXNGRAMSYMBOL 5 ++ ++/* Maximum size of the string representing an n-gram (must be greater than number of symbol) */ ++#ifdef _UTF8_ ++#define MAXNGRAMSIZE 20 ++#else ++#define MAXNGRAMSIZE MAXNGRAMSYMBOL ++#endif + + /* Which characters are not acceptable in n-grams? */ + #define INVALID(c) (isspace((int)c) || isdigit((int)c)) + + /* Minimum size (in characters) for accepting a document */ +-#define MINDOCSIZE 25 ++#define MINDOCSIZE 6 + + /* Maximum penalty for missing an n-gram in fingerprint */ + #define MAXOUTOFPLACE 400 +@@ -75,5 +84,8 @@ + #define TABLEPOW 13 + + #define MAXSCORE INT_MAX ++ ++/* where the fingerprints files are stored */ ++#define DEFAULT_FINGERPRINTS_PATH "" + + #endif -- cgit v1.2.3