[Xfce4-commits] <squeeze:master> Support multi line parsing with pcre.
Peter de Ridder
noreply at xfce.org
Mon Sep 5 22:18:02 CEST 2011
Updating branch refs/heads/master
to afac898c273b96ef4e3e0f9b670179067ff6e81a (commit)
from f1023ea06564834a3dc1e93c4581a27c79cdb009 (commit)
commit afac898c273b96ef4e3e0f9b670179067ff6e81a
Author: Peter de Ridder <peter at xfce.org>
Date: Mon Sep 5 22:16:29 2011 +0200
Support multi line parsing with pcre.
libsqueeze/pcre-parser.c | 48 ++++++++++++++++++++++++++++++++++++++++-----
1 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/libsqueeze/pcre-parser.c b/libsqueeze/pcre-parser.c
index 49d11b5..892c8f7 100644
--- a/libsqueeze/pcre-parser.c
+++ b/libsqueeze/pcre-parser.c
@@ -47,6 +47,8 @@ struct _type_parser
struct _LSQPcreParserContext
{
LSQParserContext parent;
+
+ gchar *lines;
};
struct _LSQPcreParserContextClass
@@ -66,6 +68,8 @@ struct _LSQPcreParser
type_parser *types_list;
int filename_index;
+
+ gboolean multiline;
};
struct _LSQPcreParserClass
@@ -204,6 +208,11 @@ build_parser ( LSQPcreParser *parser, const gchar *parser_string, gchar **parser
gchar **iter;
gchar *name;
+ /* TODO: Should we use g_strstr instead? */
+ /* If we want to support multiline matching without the (?m) flag we need to remove the starting lines one by one if no match was found.
+ * This is not to difficult, we could just pass the middle of the string to pcre_exec. And this will improve speed. */
+ parser->multiline = g_str_has_prefix( parser_string, "(?m)" );
+
/* Compile the regex */
parser->parser = pcre_compile(
parser_string,
@@ -438,8 +447,9 @@ static void
lsq_pcre_parser_parse ( LSQPcreParser *parser, LSQPcreParserContext *ctx )
{
gchar *line;
+ gchar *lines;
gsize line_length;
- int ovector[30];
+ int ovector[60];
int match_count;
const char *string;
guint i = 0;
@@ -447,33 +457,59 @@ lsq_pcre_parser_parse ( LSQPcreParser *parser, LSQPcreParserContext *ctx )
LSQArchive *archive;
LSQArchiveIter *iter;
int start, end;
+ int options = 0;
if ( FALSE == lsq_parser_context_get_line( LSQ_PARSER_CONTEXT( ctx ), &line, &line_length ) )
{
return;
}
+ if ( FALSE != parser->multiline )
+ {
+ options |= PCRE_PARTIAL_SOFT;
+
+ if ( NULL != ctx->lines )
+ {
+ line_length += strlen( ctx->lines );
+
+ /* TODO: use some big buffer to prevent allocation? */
+ lines = g_strconcat( ctx->lines, line, NULL );
+ g_free (ctx->lines);
+ ctx->lines = NULL;
+
+ g_free( line );
+ line = lines;
+ }
+ }
+
/* Run the regex */
+ /* TODO: Switch to pcre_dfa_exec for better performance? */
match_count = pcre_exec(
parser->parser,
parser->study,
line,
line_length,
0,
- 0,
+ options,
ovector,
- 30
+ 60
);
- if ( 0 > match_count )
+ if ( PCRE_ERROR_PARTIAL == match_count )
+ {
+ /* TODO: could store the partial match location for speed improvement and decrease memory consumption */
+ ctx->lines = line;
+ line = NULL;
+ }
+ else if ( 0 > match_count )
{
g_debug( "prce error: %d", match_count );
}
- if ( 0 == match_count )
+ else if ( 0 == match_count )
{
g_debug( "prce out of match space" );
}
- if ( 0 < match_count )
+ else if ( 0 < match_count )
{
/* Get the filename */
pcre_get_substring(
More information about the Xfce4-commits
mailing list