[Xfce4-commits] <squeeze:master> Implemented basic pcre parser
Peter de Ridder
noreply at xfce.org
Mon Aug 29 21:48:01 CEST 2011
Updating branch refs/heads/master
to dbb10d292de27557581341115a1f40d97be7cf4f (commit)
from 4aa0e6ed2d2c02a7b3d49f6485e7e7a90d55d0c4 (commit)
commit dbb10d292de27557581341115a1f40d97be7cf4f
Author: Peter de Ridder <peter at xfce.org>
Date: Mon Aug 29 21:45:09 2011 +0200
Implemented basic pcre parser
configure.in.in | 16 +-
libsqueeze/Makefile.am | 4 +
libsqueeze/archive-iter.c | 6 +-
libsqueeze/pcre-parser.c | 511 +++++++++++++++++++++++++++++++++++++++++++
libsqueeze/pcre-parser.h | 59 +++++
libsqueeze/support-reader.c | 16 ++
6 files changed, 598 insertions(+), 14 deletions(-)
diff --git a/configure.in.in b/configure.in.in
index a647a87..fd5b3c2 100644
--- a/configure.in.in
+++ b/configure.in.in
@@ -83,19 +83,13 @@ AC_ARG_ENABLE([pcre],
AC_HELP_STRING([--disable-pcre], [Disable pcre]),
[check_for_pcre=$enableval], [check_for_pcre=yes])
if test x"$check_for_pcre" = x"yes"; then
- AC_CHECK_LIB([pcre], [main],[found_pcre=yes] , [found_pcre=no])
- AC_MSG_CHECKING([for optional package pcre])
- if test x"$found_pcre" = x"yes"; then
- AC_MSG_RESULT([enabled])
- AC_DEFINE([HAVE_PCRE], [1], [Define if libpcre is present])
- else
- AC_MSG_RESULT([disabled])
+ AC_CHECK_HEADERS([pcre.h])
+ AC_CHECK_LIB([pcre], [main])
+ if test x"$ac_cv_header_pcre_h" = x"yes" -a x"$ac_cv_lib_pcre_main" = x"yes"; then
+ AC_DEFINE([HAVE_PCRE], [1], [Define if libpcre is present])
fi
-else
- AC_MSG_CHECKING([for optional package pcre])
- AC_MSG_RESULT([disabled])
fi
-AM_CONDITIONAL([HAVE_PCRE], [test x"$found_git" = x"yes"])
+AM_CONDITIONAL([HAVE_PCRE], [test x"$ac_cv_header_pcre_h" = x"yes" -a x"$ac_cv_lib_pcre_main" = x"yes"])
AC_ARG_ENABLE([pathbar],
diff --git a/libsqueeze/Makefile.am b/libsqueeze/Makefile.am
index bc987c6..51cc350 100644
--- a/libsqueeze/Makefile.am
+++ b/libsqueeze/Makefile.am
@@ -18,6 +18,10 @@ libsqueeze_2_la_SOURCES = \
support-template.c support-template.h \
support-factory.c support-factory.h
+if HAVE_PCRE
+libsqueeze_2_la_SOURCES += pcre-parser.c pcre-parser.h
+endif
+
# archive-command.c archive-command.h
# spawn-command.c spawn-command.h
# macro-command.c macro-command.h
diff --git a/libsqueeze/archive-iter.c b/libsqueeze/archive-iter.c
index ba3a73f..8a7154e 100644
--- a/libsqueeze/archive-iter.c
+++ b/libsqueeze/archive-iter.c
@@ -696,13 +696,13 @@ lsq_archive_iter_set_prop(LSQArchiveIter *iter, guint n, gconstpointer value)
switch(lsq_archive_get_entry_property_type(iter->archive, n))
{
case G_TYPE_STRING:
- lsq_archive_entry_set_prop_str(iter->archive, iter->entry, n, g_value_get_string(value));
+ lsq_archive_entry_set_prop_str(iter->archive, iter->entry, n, value);
break;
case G_TYPE_UINT:
- lsq_archive_entry_set_prop_uint(iter->archive, iter->entry, n, g_value_get_uint(value));
+ lsq_archive_entry_set_prop_uint(iter->archive, iter->entry, n, *(const guint*)value);
break;
case G_TYPE_UINT64:
- lsq_archive_entry_set_prop_uint64(iter->archive, iter->entry, n, g_value_get_uint64(value));
+ lsq_archive_entry_set_prop_uint64(iter->archive, iter->entry, n, *(const guint64*)value);
break;
}
}
diff --git a/libsqueeze/pcre-parser.c b/libsqueeze/pcre-parser.c
new file mode 100644
index 0000000..49d11b5
--- /dev/null
+++ b/libsqueeze/pcre-parser.c
@@ -0,0 +1,511 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+#include <string.h>
+#include <glib.h>
+#include <glib/gstdio.h>
+#include <glib-object.h>
+#include <pcre.h>
+
+#include <gio/gio.h>
+
+#include <libxfce4util/libxfce4util.h>
+
+#include "libsqueeze.h"
+#include "archive-iter.h"
+#include "parser-context.h"
+#include "parser.h"
+#include "pcre-parser.h"
+#include "archive.h"
+
+typedef struct _type_parser type_parser;
+typedef struct _LSQPcreParserContext LSQPcreParserContext;
+typedef struct _LSQPcreParserContextClass LSQPcreParserContextClass;
+
+typedef void (*LSQParseFunc)( gchar*, guint, LSQArchiveIter*, guint );
+
+struct _type_parser
+{
+ int index;
+ LSQParseFunc function;
+};
+
+struct _LSQPcreParserContext
+{
+ LSQParserContext parent;
+};
+
+struct _LSQPcreParserContextClass
+{
+ LSQParserContextClass parent;
+};
+
+GType lsq_pcre_parser_context_get_type ( void );
+
+struct _LSQPcreParser
+{
+ LSQParser parent;
+
+ pcre *parser;
+ pcre_extra *study;
+
+ type_parser *types_list;
+
+ int filename_index;
+};
+
+struct _LSQPcreParserClass
+{
+ LSQParserClass parent;
+};
+
+G_DEFINE_TYPE( LSQPcreParserContext, lsq_pcre_parser_context, LSQ_TYPE_PARSER_CONTEXT );
+
+static void
+lsq_pcre_parser_context_init ( LSQPcreParserContext *self )
+{
+}
+
+static void
+lsq_pcre_parser_context_class_init ( LSQPcreParserContextClass *klass )
+{
+}
+
+static LSQParserContext *
+lsq_pcre_parser_context_new ( LSQPcreParser *parser, LSQArchive *archive )
+{
+ LSQPcreParserContext *ctx;
+
+ ctx = g_object_new( lsq_pcre_parser_context_get_type(), "archive", archive, NULL );
+
+ return LSQ_PARSER_CONTEXT( ctx );
+}
+
+static void build_parser ( LSQPcreParser *, const gchar *, gchar ** );
+
+static void lsq_pcre_parser_parse ( LSQPcreParser *, LSQPcreParserContext * );
+
+G_DEFINE_TYPE( LSQPcreParser, lsq_pcre_parser, LSQ_TYPE_PARSER );
+
+static void
+lsq_pcre_parser_init ( LSQPcreParser *self )
+{
+}
+
+static void
+lsq_pcre_parser_class_init ( LSQPcreParserClass *klass )
+{
+ LSQParserClass *parser_class = LSQ_PARSER_CLASS( klass );
+ parser_class->get_context = (LSQParserContext*(*)(LSQParser*,LSQArchive*))lsq_pcre_parser_context_new;
+ parser_class->parse = (void(*)(LSQParser*,LSQParserContext*))lsq_pcre_parser_parse;
+}
+
+LSQParser *
+lsq_pcre_parser_new ( const gchar *parser_string, gchar **parser_types )
+{
+ LSQPcreParser *parser;
+
+ parser = g_object_new( LSQ_TYPE_PCRE_PARSER, NULL );
+
+ /* Build the parser base on the provided configuration */
+ build_parser( parser, parser_string, parser_types );
+
+ return LSQ_PARSER( parser );
+}
+
+#define DEF_PARSE_NUM(func, base, type) \
+static void parse_##func(gchar *str, guint lng, LSQArchiveIter *iter, guint n) { \
+ type val; \
+ val = g_ascii_strtoll( str, NULL, base ); \
+ lsq_archive_iter_set_prop( iter, n, &val ); \
+}
+
+#define DEF_PARSE_FLOAT(func, type) \
+static void parse_##func(gchar *str, guint lng, LSQArchiveIter *iter, guint n) { \
+ type val; \
+ val = g_ascii_strtod(str, NULL); \
+ lsq_archive_iter_set_prop( iter, n, &val ); \
+}
+
+#define DEF_PARSE_UNS(func, base, type) \
+static void parse_##func(gchar *str, guint lng, LSQArchiveIter *iter, guint n) { \
+ type val; \
+ val = g_ascii_strtoull( str, NULL, base ); \
+ lsq_archive_iter_set_prop( iter, n, &val ); \
+}
+
+static void
+parse_char( gchar *str, guint lng, LSQArchiveIter *iter, guint n )
+{
+ gchar val;
+
+ /* Read a single character in the character parser */
+ val = *str;
+
+ lsq_archive_iter_set_prop( iter, n, &val );
+}
+
+DEF_PARSE_NUM(decimal, 10, gint)
+DEF_PARSE_NUM(decimal16, 10, gint)
+DEF_PARSE_NUM(decimal32, 10, glong)
+DEF_PARSE_NUM(decimal64, 10, gint64)
+
+DEF_PARSE_FLOAT(floatingpoint, gfloat)
+DEF_PARSE_FLOAT(double, gdouble)
+
+DEF_PARSE_UNS(octal, 010, guint)
+DEF_PARSE_UNS(octal16, 010, guint)
+DEF_PARSE_UNS(octal32, 010, gulong)
+DEF_PARSE_UNS(octal64, 010, guint64)
+
+static void
+parse_string( gchar *str, guint lng, LSQArchiveIter *iter, guint n )
+{
+ gchar *val;
+
+ /* Create a copy of the string part */
+ val = g_strndup( str, lng );
+
+ lsq_archive_iter_set_prop( iter, n, val );
+
+ g_free( val );
+}
+
+DEF_PARSE_UNS(unsigned, 10, guint)
+DEF_PARSE_UNS(unsigned16, 10, guint)
+DEF_PARSE_UNS(unsigned32, 10, gulong)
+DEF_PARSE_UNS(unsigned64, 10, guint64)
+
+DEF_PARSE_UNS(hexadecimal, 0x10, guint)
+DEF_PARSE_UNS(hexadecimal16, 0x10, guint)
+DEF_PARSE_UNS(hexadecimal32, 0x10, gulong)
+DEF_PARSE_UNS(hexadecimal64, 0x10, guint64)
+
+static void
+build_parser ( LSQPcreParser *parser, const gchar *parser_string, gchar **parser_types )
+{
+ const char *error;
+ int error_pos;
+ gint i = 0;
+ gchar **iter;
+ gchar *name;
+
+ /* Compile the regex */
+ parser->parser = pcre_compile(
+ parser_string,
+ PCRE_DUPNAMES | PCRE_NO_AUTO_CAPTURE,
+ &error,
+ &error_pos,
+ NULL
+ );
+
+ if ( NULL == parser->parser )
+ {
+ g_error( "%s at %d in '%s'", error, error_pos, parser_string );
+ return;
+ }
+
+ /* Study the regex for optimizations */
+ parser->study = pcre_study(
+ parser->parser,
+ 0,
+ &error
+ );
+
+ if ( NULL != error )
+ {
+ g_error( "%s during study of '%s'", error, parser_string );
+ }
+
+ parser->filename_index = pcre_get_stringnumber( parser->parser, "F" );
+
+ /* Create a list for type conversion for the found substrings */
+ parser->types_list = g_new( type_parser, g_strv_length( parser_types ) );
+
+ for ( iter = parser_types; *iter; ++iter, ++i)
+ {
+ gchar *ptr;
+ GType type = G_TYPE_INVALID;
+ type_parser *type_iter;
+ gchar ch;
+ enum {
+ SIZE_NORMAL,
+ SIZE_SHORT,
+ SIZE_LONG,
+ SIZE_LONGLONG
+ } size_flag;
+
+ type_iter = &parser->types_list[i];
+
+ /* The list is has the following syntax :
+ * <submatch name>=<scanf type>
+ */
+ ptr = strchr( *iter, '=' );
+ if ( NULL == ptr )
+ {
+ return;
+ }
+
+ /* Store the index of the <submatch name> to retrieve the value during parsing */
+ name = g_strndup( *iter, ptr - *iter );
+ type_iter->index = pcre_get_stringnumber( parser->parser, name );
+ g_free( name );
+
+ ++ptr; /* Move past the '=' */
+ ch = *ptr++; /* read the first character of the scanf pattern */
+
+ /* Check for size flags */
+ switch ( ch )
+ {
+ case 'h':
+ size_flag = SIZE_SHORT;
+ ch = *ptr++;
+ break;
+
+ case 'l':
+ size_flag = SIZE_LONG;
+ ch = *ptr++;
+ if('l' != ch) /* ll equals L */
+ break;
+ case 'L':
+ size_flag = SIZE_LONGLONG;
+ ch = *ptr++;
+ break;
+ }
+
+ /* Check the type flag */
+ switch ( ch )
+ {
+ case 'c': /* Single character */
+ g_return_if_fail( SIZE_NORMAL == size_flag );
+ type_iter->function = parse_char;
+ type = G_TYPE_CHAR;
+ break;
+
+ case 'd': /* Decimal */
+ case 'i': /* Integer */
+ switch( size_flag )
+ {
+ case SIZE_NORMAL:
+ type_iter->function = parse_decimal;
+ type = G_TYPE_INT;
+ break;
+
+ case SIZE_SHORT:
+ type_iter->function = parse_decimal16;
+ type = G_TYPE_INT;
+ break;
+
+ case SIZE_LONG:
+ type_iter->function = parse_decimal32;
+ type = G_TYPE_LONG;
+ break;
+
+ case SIZE_LONGLONG:
+ type_iter->function = parse_decimal64;
+ type = G_TYPE_INT64;
+ break;
+ }
+ break;
+
+ case 'f': /* Floating point */
+ g_return_if_fail( SIZE_NORMAL == size_flag || SIZE_LONGLONG == size_flag );
+ switch(size_flag)
+ {
+ case SIZE_NORMAL:
+ type_iter->function = parse_floatingpoint;
+ type = G_TYPE_FLOAT;
+ break;
+
+ case SIZE_LONGLONG:
+ type_iter->function = parse_double;
+ type = G_TYPE_DOUBLE;
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case 'o': /* Octal unsigned integer */
+ switch(size_flag)
+ {
+ case SIZE_NORMAL:
+ type_iter->function = parse_octal;
+ type = G_TYPE_UINT;
+ break;
+
+ case SIZE_SHORT:
+ type_iter->function = parse_octal16;
+ type = G_TYPE_UINT;
+ break;
+
+ case SIZE_LONG:
+ type_iter->function = parse_octal32;
+ type = G_TYPE_ULONG;
+ break;
+
+ case SIZE_LONGLONG:
+ type_iter->function = parse_octal64;
+ type = G_TYPE_UINT64;
+ break;
+ }
+ break;
+
+ case 's': /* String */
+ type_iter->function = parse_string;
+ type = G_TYPE_STRING;
+ break;
+
+ case 'u': /* Unsigned integer */
+ switch(size_flag)
+ {
+ case SIZE_NORMAL:
+ type_iter->function = parse_unsigned;
+ type = G_TYPE_UINT;
+ break;
+
+ case SIZE_SHORT:
+ type_iter->function = parse_unsigned16;
+ type = G_TYPE_UINT;
+ break;
+
+ case SIZE_LONG:
+ type_iter->function = parse_unsigned32;
+ type = G_TYPE_ULONG;
+ break;
+
+ case SIZE_LONGLONG:
+ type_iter->function = parse_unsigned64;
+ type = G_TYPE_UINT64;
+ break;
+ }
+ break;
+
+ case 'x': /* Hexadecimal lowercase */
+ case 'X': /* Hexadecimal uppercase */
+ switch(size_flag)
+ {
+ case SIZE_NORMAL:
+ type_iter->function = parse_hexadecimal;
+ type = G_TYPE_UINT;
+ break;
+
+ case SIZE_SHORT:
+ type_iter->function = parse_hexadecimal16;
+ type = G_TYPE_UINT;
+ break;
+
+ case SIZE_LONG:
+ type_iter->function = parse_hexadecimal32;
+ type = G_TYPE_ULONG;
+ break;
+
+ case SIZE_LONGLONG:
+ type_iter->function = parse_hexadecimal64;
+ type = G_TYPE_UINT64;
+ break;
+ }
+ break;
+
+ default:
+ g_return_if_reached();
+ }
+
+ g_return_if_fail( G_TYPE_INVALID != type );
+
+ lsq_parser_set_property_type( LSQ_PARSER( parser ), i, type );
+ }
+
+ g_return_if_fail( lsq_parser_n_properties( LSQ_PARSER( parser ) ) == g_strv_length( parser_types ) );
+}
+
+static void
+lsq_pcre_parser_parse ( LSQPcreParser *parser, LSQPcreParserContext *ctx )
+{
+ gchar *line;
+ gsize line_length;
+ int ovector[30];
+ int match_count;
+ const char *string;
+ guint i = 0;
+ int index_;
+ LSQArchive *archive;
+ LSQArchiveIter *iter;
+ int start, end;
+
+ if ( FALSE == lsq_parser_context_get_line( LSQ_PARSER_CONTEXT( ctx ), &line, &line_length ) )
+ {
+ return;
+ }
+
+ /* Run the regex */
+ match_count = pcre_exec(
+ parser->parser,
+ parser->study,
+ line,
+ line_length,
+ 0,
+ 0,
+ ovector,
+ 30
+ );
+
+ if ( 0 > match_count )
+ {
+ g_debug( "prce error: %d", match_count );
+ }
+ if ( 0 == match_count )
+ {
+ g_debug( "prce out of match space" );
+ }
+ if ( 0 < match_count )
+ {
+ /* Get the filename */
+ pcre_get_substring(
+ line,
+ ovector,
+ match_count,
+ parser->filename_index,
+ &string
+ );
+
+ archive = LSQ_PARSER_CONTEXT( ctx )->archive;
+
+ /* Add the file to the archive */
+ iter = lsq_archive_add_file( archive, string );
+
+ pcre_free_substring( string );
+
+ /* Get the values of all the subfields */
+ for ( i = 0; i < lsq_parser_n_properties( LSQ_PARSER( parser ) ); ++i )
+ {
+ index_ = parser->types_list[i].index;
+
+ start = ovector[index_ * 2];
+ end = ovector[( index_ * 2 ) + 1];
+
+ /* Parse the subfield */
+ parser->types_list[i].function( line + start, end - start, iter, LSQ_ARCHIVE_PROP_USER + i );
+ }
+
+ lsq_archive_iter_unref( iter );
+ }
+
+ g_free( line );
+}
+
diff --git a/libsqueeze/pcre-parser.h b/libsqueeze/pcre-parser.h
new file mode 100644
index 0000000..61b98a5
--- /dev/null
+++ b/libsqueeze/pcre-parser.h
@@ -0,0 +1,59 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LIBSQUEEZE_PCRE_PARSER_H__
+#define __LIBSQUEEZE_PCRE_PARSER_H__
+
+G_BEGIN_DECLS
+
+#define LSQ_TYPE_PCRE_PARSER lsq_pcre_parser_get_type()
+
+#define LSQ_PCRE_PARSER(obj) ( \
+ G_TYPE_CHECK_INSTANCE_CAST ((obj), \
+ LSQ_TYPE_PCRE_PARSER, \
+ LSQPcreParser))
+
+#define LSQ_IS_PCRE_PARSER(obj) ( \
+ G_TYPE_CHECK_INSTANCE_TYPE ((obj), \
+ LSQ_TYPE_PCRE_PARSER))
+
+#define LSQ_PCRE_PARSER_CLASS(klass) ( \
+ G_TYPE_CHECK_CLASS_CAST ((klass), \
+ LSQ_TYPE_PCRE_PARSER, \
+ LSQPcreParserClass))
+
+#define LSQ_IS_PCRE_PARSER_CLASS(klass) ( \
+ G_TYPE_CHECK_CLASS_TYPE ((klass), \
+ LSQ_TYPE_PCRE_PARSER))
+
+#define LSQ_PCRE_PARSER_GET_CLASS(obj) ( \
+ G_TYPE_INSTANCE_GET_CLASS ((obj), \
+ LSQ_TYPE_PCRE_PARSER, \
+ LSQPcreParserClass))
+
+
+typedef struct _LSQPcreParser LSQPcreParser;
+
+typedef struct _LSQPcreParserClass LSQPcreParserClass;
+
+GType
+lsq_pcre_parser_get_type ( void );
+
+LSQParser *
+lsq_pcre_parser_new ( const gchar * , gchar ** );
+
+G_END_DECLS
+
+#endif /* __LIBSQUEEZE_PCRE_PARSER_H__ */
diff --git a/libsqueeze/support-reader.c b/libsqueeze/support-reader.c
index 150cf56..5032ec7 100644
--- a/libsqueeze/support-reader.c
+++ b/libsqueeze/support-reader.c
@@ -32,6 +32,9 @@
#include "parser-context.h"
#include "parser.h"
#include "scanf-parser.h"
+#ifdef HAVE_PCRE
+#include "pcre-parser.h"
+#endif
#include "command-queue.h"
#include "support-reader.h"
@@ -134,7 +137,11 @@ lsq_support_reader_parse_file(const gchar *filename)
gchar **column_names;
LSQParser *parser = NULL;
const gchar *parser_string;
+ const gchar *parser_regex;
gchar **_mime_types;
+#ifdef HAVE_PCRE
+ gchar **regex_types;
+#endif
XfceRc *rc = xfce_rc_simple_open(filename, TRUE);
@@ -194,11 +201,20 @@ lsq_support_reader_parse_file(const gchar *filename)
xfce_rc_set_group(rc, "Squeeze-Refresh");
column_names = xfce_rc_read_list_entry(rc, "X-Squeeze-Headers", ";");
parser_string = xfce_rc_read_entry(rc, "X-Squeeze-Parse", NULL);
+ parser_regex = xfce_rc_read_entry(rc, "X-Squeeze-Parse-Regex", NULL);
if (NULL != parser_string)
{
parser = lsq_scanf_parser_new(parser_string);
}
+#ifdef HAVE_PCRE
+ else if ( NULL != parser_regex )
+ {
+ regex_types = xfce_rc_read_list_entry(rc, "X-Squeeze-Types", ";");
+ parser = lsq_pcre_parser_new( parser_regex, regex_types );
+ g_strfreev( regex_types );
+ }
+#endif
_mime_types = mime_types;
for(i = 0; _mime_types[i]; ++i)
More information about the Xfce4-commits
mailing list