[Xfce4-commits] <postler:master> Use libstemmer to search with different spelling
Christian Dywan
noreply at xfce.org
Thu Aug 11 00:38:01 CEST 2011
Updating branch refs/heads/master
to d3b910b0347478fcacd62fd62fb0296e3829ddc1 (commit)
from fe15ce83a29d2b2dd3908f43c880a536485ed934 (commit)
commit d3b910b0347478fcacd62fd62fb0296e3829ddc1
Author: Christian Dywan <christian at twotoasts.de>
Date: Thu Aug 11 00:09:41 2011 +0200
Use libstemmer to search with different spelling
README | 2 +-
postler/postler-index.vala | 32 ++++++++++++++++++++++++++++----
postler/postler.vapi | 11 +++++++++++
postler/wscript_build | 2 +-
wscript | 5 +++++
5 files changed, 46 insertions(+), 6 deletions(-)
diff --git a/README b/README
index 96b34ee..303fa83 100644
--- a/README
+++ b/README
@@ -8,7 +8,7 @@ by default, offline access is integral. Contact completion and address book
integration are provided by Dexter.
Requirements: GIO 2.26, GTK+ 2.18, WebkitGTK+ 1.1.18, Unique 0.9, libnotify,
- libcanberra, (Berkeley) db, openssl
+ libcanberra, (Berkeley) db, openssl, libstemmer
Recommended: libfolks, Zeitgeist, libindicate, Dexter, lynx
diff --git a/postler/postler-index.vala b/postler/postler-index.vala
index 135f0ac..4ea4d92 100644
--- a/postler/postler-index.vala
+++ b/postler/postler-index.vala
@@ -280,15 +280,19 @@ namespace Postler {
var cond = new StringBuilder ("(");
foreach (string a_word in parts[1].split (" ")) {
/* A - (hyphen) negates search results */
- string word, negate;
+ string word;
+ string negate = "";
+ bool literal = false;
if (a_word[0] == '-') {
word = a_word.substring (1, -1);
negate = "NOT";
}
- else {
- word = a_word;
- negate = "";
+ else if (a_word[0] == '+') {
+ word = a_word.substring (1, -1);
+ literal = true;
}
+ else
+ word = a_word;
/* Interpret a month as "only messages in that month" */
string? month = dates.lookup (word.down ());
@@ -298,6 +302,26 @@ namespace Postler {
negate, month);
continue;
}
+
+ if (!literal) {
+ string? stemmed = null;
+ foreach (var lang in Intl.get_language_names ()) {
+ var stemmer = new Stemmer.Stemmer (lang.split ("-", 2)[0]);
+ if (stemmer != null) {
+ /* Get the word stem.
+ Skip to next language if word equals stem.
+ Skip stems not containing word itself. */
+ string stem = stemmer.stem (word, (int)word.length);
+ if (stem != word && stem in word) {
+ stemmed = stem;
+ break;
+ }
+ }
+ }
+ if (stemmed != null)
+ word = stemmed;
+ }
+
/* * is a wildcard, ' must be escaped by doubling it */
string escaped = word.replace ("*", "%").replace ("\'", "\'\'");
/* any is an alias for subject, sender, attachment, excerpt */
diff --git a/postler/postler.vapi b/postler/postler.vapi
index e3f3b5f..13728c0 100644
--- a/postler/postler.vapi
+++ b/postler/postler.vapi
@@ -82,3 +82,14 @@ namespace Soup {
[CCode (cname = "soup_date_to_timeval", cheader_filename = "libsoup/soup.h")]
public void date_to_timeval (Soup.Date date, GLib.TimeVal time);
}
+
+[CCode (cprefix = "sb_", lower_case_cprefix = "sb_")]
+namespace Stemmer {
+ [Compact]
+ [CCode (cname = "struct sb_stemmer", free_function = "sb_stemmer_delete", cheader_filename = "libstemmer.h")]
+ public class Stemmer {
+ public Stemmer (string algorithm, string? charenc=null);
+ public unowned string stem (string word, int size);
+ public int length ();
+ }
+}
diff --git a/postler/wscript_build b/postler/wscript_build
index b35b463..65e26be 100644
--- a/postler/wscript_build
+++ b/postler/wscript_build
@@ -14,7 +14,7 @@ obj.target = 'postler'
obj.includes = '. ..'
obj.find_sources_in_dirs ('.')
obj.uselib = 'GIO GTHREAD GTK WEBKIT SQLITE3 LIBNOTIFY LIBCANBERRA \
- UNIQUE INDICATE ZEITGEIST FOLKS GEE'
+ UNIQUE INDICATE ZEITGEIST FOLKS GEE SB_STEMMER_STEM'
obj.packages = 'config postler posix gio-2.0 libnotify libcanberra sqlite3'
obj.vapi_dirs = '.'
diff --git a/wscript b/wscript
index 745daff..853d6a2 100644
--- a/wscript
+++ b/wscript
@@ -172,6 +172,11 @@ def configure (conf):
else:
Utils.pprint ('YELLOW', 'Building without libindicate.')
+ check_function ('sb_stemmer_stem', 'libstemmer.h', 'stemmer')
+ if not conf.env['HAVE_SB_STEMMER_STEM']:
+ Utils.pprint ('RED', 'libstemmer was not found.')
+ sys.exit (1)
+
# isync
conf.check (header_name='sys/filio.h')
conf.check (fragment='#define _GNU_SOURCE\n#include <stdio.h>\n' \
More information about the Xfce4-commits
mailing list