2007-12-30 17:41:49 +01:00
|
|
|
/***************************************************************************
|
|
|
|
|
2011-12-31 03:39:20 +01:00
|
|
|
gbx_regexp.c
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2018-02-12 02:53:46 +01:00
|
|
|
(c) 2000-2017 Benoît Minisini <g4mba5@gmail.com>
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-12-31 03:39:20 +01:00
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2, or (at your option)
|
|
|
|
any later version.
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-12-31 03:39:20 +01:00
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-12-31 03:39:20 +01:00
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
MA 02110-1301, USA.
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
#define __GBX_REGEXP_C
|
|
|
|
|
|
|
|
#include "gb_common.h"
|
|
|
|
#include "gb_common_case.h"
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
|
|
|
#include "gb_alloc.h"
|
|
|
|
#include "gb_array.h"
|
|
|
|
#include "gb_error.h"
|
|
|
|
#include "gbx_c_array.h"
|
|
|
|
#include "gbx_api.h"
|
2017-10-21 01:19:27 +02:00
|
|
|
#include "gb.pcre.h"
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
#include "gbx_regexp.h"
|
|
|
|
|
|
|
|
|
|
|
|
static REGEXP_SCAN_FUNC _scan_cb = NULL;
|
|
|
|
static CARRAY *_scan_array;
|
|
|
|
|
2017-10-21 01:19:27 +02:00
|
|
|
static PCRE_INTERFACE PCRE;
|
|
|
|
|
|
|
|
static void init_pcre()
|
|
|
|
{
|
|
|
|
static bool init = FALSE;
|
|
|
|
|
|
|
|
if (init)
|
|
|
|
return;
|
|
|
|
|
|
|
|
COMPONENT_load(COMPONENT_create("gb.pcre"));
|
|
|
|
LIBRARY_get_interface_by_name("gb.pcre", PCRE_INTERFACE_VERSION, &PCRE);
|
|
|
|
init = TRUE;
|
|
|
|
}
|
|
|
|
|
2008-01-17 22:39:26 +01:00
|
|
|
bool REGEXP_match(const char *pattern, int len_pattern, const char *string, int len_string)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
2010-12-29 16:25:29 +01:00
|
|
|
unsigned char cp;
|
|
|
|
unsigned char cs;
|
|
|
|
|
2014-05-23 13:52:04 +02:00
|
|
|
#define _next_pattern() (cp = *pattern++, len_pattern--)
|
|
|
|
#define _next_string(void) (cs = *string++, len_string--)
|
2010-12-29 16:25:29 +01:00
|
|
|
|
|
|
|
/*if (len_pattern == 0 || len_string == 0)
|
|
|
|
return FALSE;*/
|
|
|
|
|
|
|
|
for(;;)
|
|
|
|
{
|
|
|
|
if (len_pattern == 0)
|
|
|
|
return (len_string == 0);
|
|
|
|
|
|
|
|
_next_pattern();
|
|
|
|
|
|
|
|
if (cp == '*')
|
|
|
|
{
|
|
|
|
const char *p;
|
|
|
|
|
|
|
|
if (len_pattern == 0)
|
|
|
|
{
|
|
|
|
if (_scan_cb)
|
|
|
|
(*_scan_cb)(string, len_string);
|
|
|
|
return TRUE;
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
p = string;
|
|
|
|
|
2010-12-29 16:25:29 +01:00
|
|
|
for(;;)
|
|
|
|
{
|
|
|
|
if (REGEXP_match(pattern, len_pattern, string, len_string))
|
|
|
|
{
|
|
|
|
if (_scan_cb)
|
|
|
|
(*_scan_cb)(p, string - p);
|
|
|
|
return TRUE;
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
2010-12-29 16:25:29 +01:00
|
|
|
if (len_string == 0)
|
|
|
|
return FALSE;
|
|
|
|
_next_string();
|
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2010-12-29 16:25:29 +01:00
|
|
|
if (len_string == 0)
|
|
|
|
return FALSE; /*end || (len_pattern == 0);*/
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2010-12-29 16:25:29 +01:00
|
|
|
_next_string();
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2010-12-29 16:25:29 +01:00
|
|
|
if (cp == '?')
|
|
|
|
continue;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2010-12-29 16:25:29 +01:00
|
|
|
if (cp == '[' && len_pattern > 0)
|
|
|
|
{
|
|
|
|
bool not = FALSE;
|
|
|
|
bool in = FALSE;
|
|
|
|
unsigned char cb = 0;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2010-12-29 16:25:29 +01:00
|
|
|
_next_pattern();
|
|
|
|
if (cp == '^')
|
|
|
|
{
|
|
|
|
not = TRUE;
|
|
|
|
_next_pattern();
|
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2010-12-29 16:25:29 +01:00
|
|
|
if (cp == cs)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
2010-12-29 16:25:29 +01:00
|
|
|
in = TRUE;
|
|
|
|
_next_pattern();
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
2010-12-29 16:25:29 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
for(;;)
|
|
|
|
{
|
|
|
|
if (cp == '-' && len_pattern > 1 && cb && cb != '-')
|
|
|
|
{
|
|
|
|
_next_pattern();
|
|
|
|
if (cb <= cs && cs <= cp)
|
|
|
|
{
|
|
|
|
in = TRUE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
cb = 0;
|
|
|
|
}
|
|
|
|
else if (cp == cs)
|
|
|
|
{
|
|
|
|
in = TRUE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
cb = cp;
|
|
|
|
|
|
|
|
_next_pattern();
|
|
|
|
if (cp == ']')
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for(;;)
|
|
|
|
{
|
|
|
|
if (cp == ']')
|
|
|
|
break;
|
|
|
|
if (len_pattern == 0)
|
|
|
|
THROW(E_REGEXP, "Missing ']'");
|
|
|
|
_next_pattern();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (in ^ not)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
2009-09-27 11:28:52 +02:00
|
|
|
|
2010-12-29 16:25:29 +01:00
|
|
|
if (cp == ' ')
|
|
|
|
{
|
|
|
|
if (cs > ' ')
|
|
|
|
return FALSE;
|
2009-09-27 11:28:52 +02:00
|
|
|
|
2010-12-29 16:25:29 +01:00
|
|
|
while (len_string && cs <= ' ')
|
|
|
|
_next_string();
|
2009-09-27 11:28:52 +02:00
|
|
|
|
|
|
|
if (cs > ' ')
|
|
|
|
{
|
|
|
|
string--;
|
|
|
|
len_string++;
|
|
|
|
}
|
2014-01-12 02:59:49 +01:00
|
|
|
|
|
|
|
while (len_pattern && cp == ' ')
|
|
|
|
_next_pattern();
|
|
|
|
|
|
|
|
if (cp != ' ')
|
|
|
|
{
|
|
|
|
pattern--;
|
|
|
|
len_pattern++;
|
|
|
|
}
|
|
|
|
|
2009-09-27 11:28:52 +02:00
|
|
|
continue;
|
2010-12-29 16:25:29 +01:00
|
|
|
}
|
2009-09-27 11:28:52 +02:00
|
|
|
|
2015-04-13 19:04:43 +02:00
|
|
|
if (cp == '{')
|
2009-09-27 11:28:52 +02:00
|
|
|
{
|
|
|
|
const char *save_string;
|
|
|
|
int save_len_string;
|
2013-12-31 16:57:20 +01:00
|
|
|
const char *save_pattern;
|
|
|
|
int save_len_pattern;
|
2009-09-27 11:28:52 +02:00
|
|
|
|
|
|
|
string--; len_string++;
|
|
|
|
save_string = string;
|
|
|
|
save_len_string = len_string;
|
2013-12-31 16:57:20 +01:00
|
|
|
|
2013-12-31 22:39:24 +01:00
|
|
|
NEXT_SUB_PATTERN:
|
2013-12-31 16:57:20 +01:00
|
|
|
|
2013-12-31 22:39:24 +01:00
|
|
|
for(;;)
|
|
|
|
{
|
2015-04-13 19:04:43 +02:00
|
|
|
if (len_pattern == 0)
|
|
|
|
goto MISSING_BRACE;
|
2013-12-31 22:39:24 +01:00
|
|
|
_next_pattern();
|
|
|
|
if (cp == ',' || cp == '}')
|
|
|
|
break;
|
|
|
|
_next_string();
|
|
|
|
if (tolower(cp) != tolower(cs))
|
|
|
|
break;
|
|
|
|
}
|
2013-12-31 16:57:20 +01:00
|
|
|
|
2013-12-31 22:39:24 +01:00
|
|
|
if (cp == ',' || cp == '}')
|
|
|
|
{
|
|
|
|
save_pattern = pattern - 1;
|
|
|
|
save_len_pattern = len_pattern + 1;
|
2013-12-31 16:57:20 +01:00
|
|
|
|
|
|
|
while (cp != '}')
|
2009-09-27 11:28:52 +02:00
|
|
|
{
|
2013-12-31 16:57:20 +01:00
|
|
|
if (len_pattern == 0)
|
2015-04-13 19:04:43 +02:00
|
|
|
goto MISSING_BRACE;
|
2013-12-29 20:24:52 +01:00
|
|
|
_next_pattern();
|
2009-09-27 11:28:52 +02:00
|
|
|
}
|
2013-12-29 20:24:52 +01:00
|
|
|
|
2013-12-31 16:57:20 +01:00
|
|
|
if (REGEXP_match(pattern, len_pattern, string, len_string))
|
|
|
|
return TRUE;
|
2013-12-29 20:24:52 +01:00
|
|
|
|
2013-12-31 16:57:20 +01:00
|
|
|
pattern = save_pattern;
|
|
|
|
len_pattern = save_len_pattern;
|
2013-12-31 22:39:24 +01:00
|
|
|
_next_pattern();
|
2009-09-27 11:28:52 +02:00
|
|
|
}
|
2013-12-31 22:39:24 +01:00
|
|
|
|
|
|
|
while (cp != ',')
|
|
|
|
{
|
|
|
|
if (cp == '}')
|
|
|
|
return FALSE;
|
|
|
|
|
2015-04-13 19:04:43 +02:00
|
|
|
if (len_pattern == 0)
|
|
|
|
goto MISSING_BRACE;
|
2013-12-31 22:39:24 +01:00
|
|
|
_next_pattern();
|
|
|
|
}
|
|
|
|
|
|
|
|
string = save_string;
|
|
|
|
len_string = save_len_string;
|
|
|
|
|
|
|
|
goto NEXT_SUB_PATTERN;
|
2009-09-27 11:28:52 +02:00
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2010-12-29 16:25:29 +01:00
|
|
|
if (cp == '\\')
|
|
|
|
{
|
|
|
|
if (len_pattern == 0)
|
|
|
|
THROW(E_REGEXP, "Trailing backslash");
|
|
|
|
_next_pattern();
|
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2010-12-29 16:25:29 +01:00
|
|
|
if (tolower(cp) != tolower(cs))
|
|
|
|
return FALSE;
|
|
|
|
}
|
2015-04-13 19:04:43 +02:00
|
|
|
|
|
|
|
MISSING_BRACE:
|
|
|
|
|
|
|
|
THROW(E_REGEXP, "Missing '}'");
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-01-17 22:39:26 +01:00
|
|
|
static void add_string(const char *str, int len)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
|
|
|
char **p = (char **)GB_ArrayAdd((GB_ARRAY)_scan_array);
|
|
|
|
if (len)
|
2010-06-05 01:48:53 +02:00
|
|
|
*p = STRING_new(str, len);
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-01-17 22:39:26 +01:00
|
|
|
bool REGEXP_scan(CARRAY *array, const char *pattern, int len_pattern, const char *string, int len_string)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
|
|
|
bool match;
|
|
|
|
|
|
|
|
_scan_cb = add_string;
|
|
|
|
_scan_array = array;
|
|
|
|
match = REGEXP_match(pattern, len_pattern, string, len_string);
|
|
|
|
CARRAY_reverse(array, NULL);
|
|
|
|
_scan_cb = NULL;
|
|
|
|
_scan_array = NULL;
|
|
|
|
|
|
|
|
return match;
|
|
|
|
}
|
|
|
|
|
2017-10-21 01:19:27 +02:00
|
|
|
bool REGEXP_match_pcre(const char *pattern, int len_pattern, const char *string, int len_string)
|
|
|
|
{
|
|
|
|
init_pcre();
|
|
|
|
return PCRE.Match(string, len_string, pattern, len_pattern, 0, 0);
|
|
|
|
}
|
|
|
|
|