[Xfce4-commits] <midori:master> Implement katze_utf8_stristr, katze_collfold and use in completion

Christian Dywan noreply at xfce.org
Tue Dec 29 00:00:01 CET 2009


Updating branch refs/heads/master
         to b1ee80d55ed65bff17566f9f2b2bafbc76550550 (commit)
       from 613f47627e79e9c691d8aff009b710cf041d8e63 (commit)

commit b1ee80d55ed65bff17566f9f2b2bafbc76550550
Author: Christian Dywan <christian at twotoasts.de>
Date:   Mon Dec 28 23:44:16 2009 +0100

    Implement katze_utf8_stristr, katze_collfold and use in completion
    
    Evidently normalizing any whole string is too slow for completion,
    so we need to even out case and composition while iterating through
    the strings.
    
    The decompositing version of katze_utf8_stristr is disabled, since
    it is too slow, and an ascii only version is used, for now.
    
    A unit test 'compare' is added that solely measures performance of
    katze_collfold and katze_utf8_stristr.

 katze/katze-utils.c            |   93 ++++++++++++++++++++++++++++++++++++++++
 katze/katze-utils.h            |    7 +++
 midori/midori-locationaction.c |   40 +----------------
 tests/completion.c             |   33 ++++++++++++++
 4 files changed, 135 insertions(+), 38 deletions(-)

diff --git a/katze/katze-utils.c b/katze/katze-utils.c
index 60329eb..86a7f90 100644
--- a/katze/katze-utils.c
+++ b/katze/katze-utils.c
@@ -1482,3 +1482,96 @@ katze_load_cached_icon (const gchar* uri,
     return icon || !widget ? icon : gtk_widget_render_icon (widget,
         GTK_STOCK_FILE, GTK_ICON_SIZE_MENU, NULL);
 }
+
+/**
+ * katze_collfold:
+ * @str: a non-NULL UTF-8 string
+ *
+ * Computes a string without case and decomposited so
+ * it can be used for comparison.
+ *
+ * Return value: a normalized string
+ *
+ * Since: 0.2.3
+ **/
+gchar*
+katze_collfold (const gchar* str)
+{
+    GString* result = g_string_new (NULL);
+    const gchar* p = str;
+
+    while (*p)
+    {
+        gunichar ch = g_unichar_tolower (g_utf8_get_char (p));
+        gsize len;
+        gunichar* sch = g_unicode_canonical_decomposition (ch, &len);
+        guint i = 0;
+        while (i < len)
+            g_string_append_unichar (result, sch[i++]);
+
+        p = g_utf8_next_char (p);
+    }
+
+    return g_string_free (result, FALSE);
+}
+
+/**
+ * katze_utf8_stristr:
+ * @haystack: a non-NULL UTF-8 string
+ * @needle: a normalized non-NULL UTF-8 string
+ *
+ * Determines whether @needle is in @haystack, disregarding
+ * differences in case.
+ *
+ * Return value: %TRUE if @needle is found in @haystack
+ *
+ * Since: 0.2.3
+ **/
+gboolean
+katze_utf8_stristr (const gchar* haystack,
+                    const gchar* needle)
+{
+    #if 0 /* 0,000159 seconds */
+    /* Too slow for use in completion */
+    gchar* nhaystack = g_utf8_normalize (haystack, -1, G_NORMALIZE_DEFAULT);
+    const gchar *p = nhaystack;
+    gsize len = strlen (needle);
+    gsize i;
+
+    while (*p)
+    {
+        for (i = 0; i < len; i++)
+            if (g_unichar_tolower (g_utf8_get_char (p + i))
+             != g_unichar_tolower (g_utf8_get_char (needle + i)))
+                goto next;
+
+        g_free (nhaystack);
+        return TRUE;
+
+        next:
+            p = g_utf8_next_char (p);
+    }
+
+    g_free (nhaystack);
+    return FALSE;
+    #else /* 0,000044 seconds */
+    /* No unicode matching */
+    const gchar *p = haystack;
+    gsize len = strlen (needle);
+    gsize i;
+
+    while (*p)
+    {
+        for (i = 0; i < len; i++)
+            if (g_ascii_tolower (p[i]) != g_ascii_tolower (needle[i]))
+                goto next;
+
+        return TRUE;
+
+        next:
+            p++;
+    }
+
+    return FALSE;
+    #endif
+}
diff --git a/katze/katze-utils.h b/katze/katze-utils.h
index c8648ff..6a5843f 100644
--- a/katze/katze-utils.h
+++ b/katze/katze-utils.h
@@ -151,6 +151,13 @@ GdkPixbuf*
 katze_load_cached_icon               (const gchar*    uri,
                                       GtkWidget*      widget);
 
+gchar*
+katze_collfold                       (const gchar*    str);
+
+gboolean
+katze_utf8_stristr                   (const gchar*    haystack,
+                                      const gchar*    needle);
+
 G_END_DECLS
 
 #endif /* __KATZE_UTILS_H__ */
diff --git a/midori/midori-locationaction.c b/midori/midori-locationaction.c
index f05a9d1..4939b56 100644
--- a/midori/midori-locationaction.c
+++ b/midori/midori-locationaction.c
@@ -755,47 +755,11 @@ midori_location_entry_completion_match_cb (GtkEntryCompletion* completion,
     match = FALSE;
     if (G_LIKELY (uri))
     {
-        gchar* nkey;
-        gchar* fkey;
-        gchar* nuri;
-        gchar* furi;
-
-        if ((nkey = g_utf8_normalize (key, -1, G_NORMALIZE_ALL)))
-        {
-            fkey = g_utf8_casefold (nkey, -1);
-            g_free (nkey);
-        }
-        else
-            fkey = g_utf8_casefold (key, -1);
-        if ((nuri = g_utf8_normalize (uri, -1, G_NORMALIZE_ALL)))
-        {
-            furi = g_utf8_casefold (nuri, -1);
-            g_free (nuri);
-        }
-        else
-            furi = g_utf8_casefold (uri, -1);
+        match = katze_utf8_stristr (uri, key);
         g_free (uri);
-        match = strstr (furi, fkey) != NULL;
-        g_free (furi);
 
         if (!match && G_LIKELY (title))
-        {
-            gchar* ntitle;
-            gchar* ftitle;
-
-            if ((ntitle = g_utf8_normalize (title, -1, G_NORMALIZE_ALL)))
-            {
-                ftitle = g_utf8_casefold (ntitle, -1);
-                g_free (ntitle);
-            }
-            else
-                ftitle = g_utf8_casefold (title, -1);
-
-            match = strstr (ftitle, fkey) != NULL;
-            g_free (ftitle);
-        }
-
-        g_free (fkey);
+            match = katze_utf8_stristr (title, key);
     }
 
     g_free (title);
diff --git a/tests/completion.c b/tests/completion.c
index ff802bc..26614db 100644
--- a/tests/completion.c
+++ b/tests/completion.c
@@ -19,6 +19,38 @@
 GtkWidget*
 midori_location_action_entry_for_proxy (GtkWidget* proxy);
 
+static const gchar* compare_urls[] = {
+ "http://en.wikipedia.org/wiki/Foul",
+ "http://de.wikipedia.org/wiki/Düsseldorf",
+ "http://de.wikipedia.org/wiki/Düsseldorf",
+ "http://ja.wikipedia.org/wiki/若井はんじ・けんじ",
+ "http://www.johannkönig.com",
+ "http://şøñđëřżēıċħęŋđőmæîņĭśŧşũþėŗ.de",
+ };
+
+static void
+completion_compare (void)
+{
+    const guint runs = 10000;
+    guint t;
+    gdouble elapsed = 0.0;
+
+    for (t = 0; t < runs; t++)
+    {
+        g_test_timer_start ();
+        guint i, j;
+        for (i = 0; i < G_N_ELEMENTS (compare_urls); i++)
+        {
+            gchar* url = katze_collfold (compare_urls[i]);
+            for (j = 0; j < G_N_ELEMENTS (compare_urls); j++)
+                katze_utf8_stristr (compare_urls[i], url);
+            g_free  (url);
+        }
+        elapsed += g_test_timer_elapsed ();
+    }
+    g_print ("%f seconds for comparison\n", elapsed / runs);
+}
+
 typedef struct
 {
     const gchar* uri;
@@ -287,6 +319,7 @@ main (int    argc,
     g_test_init (&argc, &argv, NULL);
     gtk_init_check (&argc, &argv);
 
+    g_test_add_func ("/completion/compare", completion_compare);
     g_test_add_func ("/completion/count", completion_count);
     g_test_add_func ("/completion/fill", completion_fill);
     g_test_add_func ("/completion/match", completion_match);



More information about the Xfce4-commits mailing list