[Xfce4-commits] <postler:master> Use libstemmer to search with different spelling

Christian Dywan noreply at xfce.org
Thu Aug 11 00:38:01 CEST 2011


Updating branch refs/heads/master
         to d3b910b0347478fcacd62fd62fb0296e3829ddc1 (commit)
       from fe15ce83a29d2b2dd3908f43c880a536485ed934 (commit)

commit d3b910b0347478fcacd62fd62fb0296e3829ddc1
Author: Christian Dywan <christian at twotoasts.de>
Date:   Thu Aug 11 00:09:41 2011 +0200

    Use libstemmer to search with different spelling

 README                     |    2 +-
 postler/postler-index.vala |   32 ++++++++++++++++++++++++++++----
 postler/postler.vapi       |   11 +++++++++++
 postler/wscript_build      |    2 +-
 wscript                    |    5 +++++
 5 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/README b/README
index 96b34ee..303fa83 100644
--- a/README
+++ b/README
@@ -8,7 +8,7 @@ by default, offline access is integral. Contact completion and address book
 integration are provided by Dexter.
 
 Requirements: GIO 2.26, GTK+ 2.18, WebkitGTK+ 1.1.18, Unique 0.9, libnotify,
-              libcanberra, (Berkeley) db, openssl
+              libcanberra, (Berkeley) db, openssl, libstemmer
 
 Recommended: libfolks, Zeitgeist, libindicate, Dexter, lynx
 
diff --git a/postler/postler-index.vala b/postler/postler-index.vala
index 135f0ac..4ea4d92 100644
--- a/postler/postler-index.vala
+++ b/postler/postler-index.vala
@@ -280,15 +280,19 @@ namespace Postler {
                     var cond = new StringBuilder ("(");
                     foreach (string a_word in parts[1].split (" ")) {
                         /* A - (hyphen) negates search results */
-                        string word, negate;
+                        string word;
+                        string negate = "";
+                        bool literal = false;
                         if (a_word[0] == '-') {
                             word = a_word.substring (1, -1);
                             negate = "NOT";
                         }
-                        else {
-                            word = a_word;
-                            negate = "";
+                        else if (a_word[0] == '+') {
+                            word = a_word.substring (1, -1);
+                            literal = true;
                         }
+                        else
+                            word = a_word;
 
                         /* Interpret a month as "only messages in that month" */
                         string? month = dates.lookup (word.down ());
@@ -298,6 +302,26 @@ namespace Postler {
                                 negate, month);
                             continue;
                         }
+
+                        if (!literal) {
+                            string? stemmed = null;
+                            foreach (var lang in Intl.get_language_names ()) {
+                                var stemmer = new Stemmer.Stemmer (lang.split ("-", 2)[0]);
+                                if (stemmer != null) {
+                                    /* Get the word stem.
+                                       Skip to next language if word equals stem.
+                                       Skip stems not containing word itself. */
+                                    string stem = stemmer.stem (word, (int)word.length);
+                                    if (stem != word && stem in word) {
+                                        stemmed = stem;
+                                        break;
+                                    }
+                                }
+                            }
+                            if (stemmed != null)
+                                word = stemmed;
+                        }
+
                         /* * is a wildcard, ' must be escaped by doubling it */
                         string escaped = word.replace ("*", "%").replace ("\'", "\'\'");
                         /* any is an alias for subject, sender, attachment, excerpt */
diff --git a/postler/postler.vapi b/postler/postler.vapi
index e3f3b5f..13728c0 100644
--- a/postler/postler.vapi
+++ b/postler/postler.vapi
@@ -82,3 +82,14 @@ namespace Soup {
     [CCode (cname = "soup_date_to_timeval", cheader_filename = "libsoup/soup.h")]
     public void date_to_timeval (Soup.Date date, GLib.TimeVal time);
 }
+
+[CCode (cprefix = "sb_", lower_case_cprefix = "sb_")]
+namespace Stemmer {
+    [Compact]
+    [CCode (cname = "struct sb_stemmer", free_function = "sb_stemmer_delete", cheader_filename = "libstemmer.h")]
+    public class Stemmer {
+        public Stemmer (string algorithm, string? charenc=null);
+        public unowned string stem (string word, int size);
+        public int length ();
+    }
+}
diff --git a/postler/wscript_build b/postler/wscript_build
index b35b463..65e26be 100644
--- a/postler/wscript_build
+++ b/postler/wscript_build
@@ -14,7 +14,7 @@ obj.target = 'postler'
 obj.includes = '. ..'
 obj.find_sources_in_dirs ('.')
 obj.uselib = 'GIO GTHREAD GTK WEBKIT SQLITE3 LIBNOTIFY LIBCANBERRA  \
-              UNIQUE INDICATE ZEITGEIST FOLKS GEE'
+              UNIQUE INDICATE ZEITGEIST FOLKS GEE SB_STEMMER_STEM'
 obj.packages = 'config postler posix gio-2.0 libnotify libcanberra sqlite3'
 obj.vapi_dirs = '.'
 
diff --git a/wscript b/wscript
index 745daff..853d6a2 100644
--- a/wscript
+++ b/wscript
@@ -172,6 +172,11 @@ def configure (conf):
     else:
         Utils.pprint ('YELLOW', 'Building without libindicate.')
 
+    check_function ('sb_stemmer_stem', 'libstemmer.h', 'stemmer')
+    if not conf.env['HAVE_SB_STEMMER_STEM']:
+        Utils.pprint ('RED', 'libstemmer was not found.')
+        sys.exit (1)
+
     # isync
     conf.check (header_name='sys/filio.h')
     conf.check (fragment='#define _GNU_SOURCE\n#include <stdio.h>\n' \


More information about the Xfce4-commits mailing list