[Xfce4-commits] <squeeze:master> Support multi line parsing with pcre.

Peter de Ridder noreply at xfce.org
Mon Sep 5 22:18:02 CEST 2011


Updating branch refs/heads/master
         to afac898c273b96ef4e3e0f9b670179067ff6e81a (commit)
       from f1023ea06564834a3dc1e93c4581a27c79cdb009 (commit)

commit afac898c273b96ef4e3e0f9b670179067ff6e81a
Author: Peter de Ridder <peter at xfce.org>
Date:   Mon Sep 5 22:16:29 2011 +0200

    Support multi line parsing with pcre.

 libsqueeze/pcre-parser.c |   48 ++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/libsqueeze/pcre-parser.c b/libsqueeze/pcre-parser.c
index 49d11b5..892c8f7 100644
--- a/libsqueeze/pcre-parser.c
+++ b/libsqueeze/pcre-parser.c
@@ -47,6 +47,8 @@ struct _type_parser
 struct _LSQPcreParserContext
 {
     LSQParserContext parent;
+
+    gchar *lines;
 };
 
 struct _LSQPcreParserContextClass
@@ -66,6 +68,8 @@ struct _LSQPcreParser
     type_parser *types_list;
 
     int filename_index;
+
+    gboolean multiline;
 };
 
 struct _LSQPcreParserClass
@@ -204,6 +208,11 @@ build_parser ( LSQPcreParser *parser, const gchar *parser_string, gchar **parser
     gchar **iter;
     gchar *name;
 
+    /* TODO: Should we use g_strstr instead? */
+    /* If we want to support multiline matching without the (?m) flag we need to remove the starting lines one by one if no match was found.
+     * This is not to difficult, we could just pass the middle of the string to pcre_exec. And this will improve speed. */
+    parser->multiline = g_str_has_prefix( parser_string, "(?m)" );
+
     /* Compile the regex */
     parser->parser = pcre_compile(
             parser_string,
@@ -438,8 +447,9 @@ static void
 lsq_pcre_parser_parse ( LSQPcreParser *parser, LSQPcreParserContext *ctx )
 {
     gchar *line;
+    gchar *lines;
     gsize line_length;
-    int ovector[30];
+    int ovector[60];
     int match_count;
     const char *string;
     guint i = 0;
@@ -447,33 +457,59 @@ lsq_pcre_parser_parse ( LSQPcreParser *parser, LSQPcreParserContext *ctx )
     LSQArchive *archive;
     LSQArchiveIter *iter;
     int start, end;
+    int options = 0;
 
     if ( FALSE == lsq_parser_context_get_line( LSQ_PARSER_CONTEXT( ctx ), &line, &line_length ) )
     {
         return;
     }
 
+    if ( FALSE != parser->multiline )
+    {
+        options |= PCRE_PARTIAL_SOFT;
+
+        if ( NULL != ctx->lines )
+	{
+	    line_length += strlen( ctx->lines );
+
+	    /* TODO: use some big buffer to prevent allocation? */
+	    lines = g_strconcat( ctx->lines, line, NULL );
+	    g_free (ctx->lines);
+	    ctx->lines = NULL;
+
+	    g_free( line );
+	    line = lines;
+	}
+    }
+
     /* Run the regex */
+    /* TODO: Switch to pcre_dfa_exec for better performance? */
     match_count = pcre_exec(
             parser->parser,
             parser->study,
             line,
             line_length,
             0,
-            0,
+            options,
             ovector,
-            30
+            60
         );
 
-    if ( 0 > match_count )
+    if ( PCRE_ERROR_PARTIAL == match_count )
+    {
+	/* TODO: could store the partial match location for speed improvement and decrease memory consumption */
+	ctx->lines = line;
+	line = NULL;
+    }
+    else if ( 0 > match_count )
     {
         g_debug( "prce error: %d", match_count );
     }
-    if ( 0 == match_count )
+    else if ( 0 == match_count )
     {
         g_debug( "prce out of match space" );
     }
-    if ( 0 < match_count )
+    else if ( 0 < match_count )
     {
         /* Get the filename */
         pcre_get_substring(


More information about the Xfce4-commits mailing list