gambas-source-code/gb.pcre/src/regexp.c

396 lines
9.2 KiB
C
Raw Normal View History

/***************************************************************************
regexp.c
(c) 2004 Rob Kudla <pcre-component@kudla.org>
(c) 2000-2012 Benoît Minisini <gambas@users.sourceforge.net>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA.
***************************************************************************/
#define __REGEXP_C
#include "gb_common.h"
#include "regexp.h"
#include "main.h"
#define OVECSIZE_INC 99
/***************************************************************************
Regexp
***************************************************************************/
static void compile(void *_object)
{
int errptr;
const char *errstr;
if (!THIS->pattern) {
GB.Error("No pattern provided");
return;
}
if (THIS->code)
free(THIS->code);
THIS->code = pcre_compile(THIS->pattern, THIS->copts, &errstr, &errptr, NULL);
if (!THIS->code)
{
THIS->error = errptr;
GB.Error(errstr);
}
}
static void exec(void *_object)
{
int ret;
char code[8];
if (!THIS->code)
{
GB.Error("No pattern compiled yet");
return;
}
if (!THIS->subject)
{
GB.Error("No subject provided");
return;
}
for(;;)
{
ret = pcre_exec(THIS->code,
NULL,
THIS->subject,
GB.StringLength(THIS->subject),
0,
THIS->eopts,
THIS->ovector,
THIS->ovecsize);
if (ret > 0)
{
THIS->error = 0;
THIS->count = ret;
break;
}
else if (ret < 0)
{
THIS->error = ret;
switch (ret)
{
case PCRE_ERROR_NOMATCH:
THIS->count = 0; return;
case PCRE_ERROR_NULL:
GB.Error("Pattern or subject is null"); return;
case PCRE_ERROR_BADOPTION:
GB.Error("Unknown option"); return;
case PCRE_ERROR_BADMAGIC:
case PCRE_ERROR_UNKNOWN_OPCODE:
GB.Error("Incorrect PCRE bytecode"); return;
case PCRE_ERROR_NOMEMORY:
GB.Error("Out of memory"); return;
case PCRE_ERROR_BADUTF8:
#ifdef PCRE_ERROR_SHORTUTF8
case PCRE_ERROR_SHORTUTF8:
#endif
GB.Error("Bad UTF-8 string"); return;
#ifdef PCRE_ERROR_BADUTF8_OFFSET
case PCRE_ERROR_BADUTF8_OFFSET:
GB.Error("Bad UTF-8 offset"); return;
#endif
case PCRE_ERROR_INTERNAL:
GB.Error("Unexpected internal error"); return;
case PCRE_ERROR_BADNEWLINE:
GB.Error("Invalid combination of newline options"); return;
//case PCRE_ERROR_RECURSELOOP:
// GB.Error("Recursion loop detected"); return;
//case PCRE_ERROR_JIT_STACKLIMIT:
// GB.Error("JIT stack limit reached"); return;
default:
sprintf(code, "%d", -ret);
GB.Error("Unable to exec regular expression: error #&1", code);
return;
}
}
THIS->ovecsize += OVECSIZE_INC;
GB.Realloc(POINTER(&THIS->ovector), THIS->ovecsize * sizeof(int));
}
}
static void return_match(void *_object, int index)
{
if (index < 0 || index >= THIS->count)
{
GB.Error("Out of bounds");
return;
}
index *= 2;
GB.ReturnNewString(&THIS->subject[THIS->ovector[index]], THIS->ovector[index + 1] - THIS->ovector[index]);
}
BEGIN_METHOD(RegExp_Compile, GB_STRING pattern; GB_INTEGER coptions)
THIS->copts = VARGOPT(coptions, 0);
GB.FreeString(&THIS->pattern);
THIS->pattern = GB.NewString(STRING(pattern), LENGTH(pattern));
compile(THIS);
END_METHOD
BEGIN_METHOD(RegExp_Exec, GB_STRING subject; GB_INTEGER eoptions)
THIS->eopts = VARGOPT(eoptions, 0);
GB.FreeString(&THIS->subject);
THIS->subject = GB.NewString(STRING(subject), LENGTH(subject));
exec(THIS);
END_METHOD
BEGIN_METHOD(RegExp_new, GB_STRING subject; GB_STRING pattern; GB_INTEGER coptions; GB_INTEGER eoptions)
THIS->ovecsize = OVECSIZE_INC;
GB.Alloc(POINTER(&THIS->ovector), sizeof(int) * THIS->ovecsize);
if (MISSING(pattern)) // the user didn't provide a pattern.
return;
THIS->copts = VARGOPT(coptions, 0);
THIS->pattern = GB.NewString(STRING(pattern), LENGTH(pattern));
THIS->code = NULL;
compile(THIS);
if (!THIS->code) // we didn't get a compiled pattern back.
return;
if (MISSING(subject)) // the user didn't specify any subject text.
return;
THIS->eopts = VARGOPT(eoptions, 0);
THIS->subject = GB.NewString(STRING(subject), LENGTH(subject));
exec(THIS);
END_METHOD
BEGIN_METHOD_VOID(RegExp_free)
if (THIS->code)
free(THIS->code);
GB.FreeString(&THIS->subject);
GB.FreeString(&THIS->pattern);
GB.Free(POINTER(&THIS->ovector));
END_METHOD
BEGIN_METHOD(RegExp_Match, GB_STRING subject; GB_STRING pattern; GB_INTEGER coptions; GB_INTEGER eoptions)
/*
* The gb.pcre internal routines don't require the GB_BASE to be
* initialised by Gambas!
*/
CREGEXP tmp;
bool ret = FALSE;
CLEAR(&tmp);
tmp.ovecsize = OVECSIZE_INC;
GB.Alloc(POINTER(&tmp.ovector), sizeof(int) * tmp.ovecsize);
tmp.copts = VARGOPT(coptions, 0);
tmp.pattern = GB.NewString(STRING(pattern), LENGTH(pattern));
compile(&tmp);
if (tmp.code)
{
tmp.eopts = VARGOPT(eoptions, 0);
tmp.subject = GB.NewString(STRING(subject), LENGTH(subject));
exec(&tmp);
ret = (tmp.ovector[0] != -1);
}
RegExp_free(&tmp, NULL);
GB.ReturnBoolean(ret);
END_METHOD
BEGIN_PROPERTY(RegExp_Pattern)
GB.ReturnString(THIS->pattern);
END_PROPERTY
BEGIN_PROPERTY(RegExp_Subject)
GB.ReturnString(THIS->subject);
END_PROPERTY
BEGIN_PROPERTY(RegExp_Offset)
GB.ReturnInteger(THIS->ovector[0]);
END_PROPERTY
BEGIN_PROPERTY(RegExp_Text)
if (THIS->count == 0)
GB.ReturnVoidString();
else
return_match(THIS, 0);
END_PROPERTY
BEGIN_PROPERTY(RegExp_Error)
GB.ReturnInteger(THIS->error);
END_PROPERTY
BEGIN_PROPERTY(RegExp_Submatches_Count)
GB.ReturnInteger(THIS->count - 1);
END_PROPERTY
BEGIN_METHOD(RegExp_Submatches_get, GB_INTEGER index)
int index = VARG(index);
if (index < 0 || index >= THIS->count)
{
GB.Error("Out of bounds");
return;
}
THIS->_submatch = index;
RETURN_SELF();
END_METHOD
BEGIN_PROPERTY(RegExp_Submatch_Text)
return_match(THIS, THIS->_submatch);
END_PROPERTY
BEGIN_PROPERTY(RegExp_Submatch_Offset)
GB.ReturnInteger(THIS->ovector[2 * THIS->_submatch]);
END_PROPERTY
GB_DESC CRegexpDesc[] =
{
GB_DECLARE("Regexp", sizeof(CREGEXP)),
GB_METHOD("_new", NULL, RegExp_new, "[(Subject)s(Pattern)s(CompileOptions)i(ExecOptions)i]"),
GB_METHOD("_free", NULL, RegExp_free, NULL),
GB_METHOD("Compile", NULL, RegExp_Compile, "(Pattern)s[(CompileOptions)i]"),
GB_METHOD("Exec", NULL, RegExp_Exec, "(Subject)s[(ExecOptions)i]"),
GB_STATIC_METHOD("Match", "b", RegExp_Match, "(Subject)s(Pattern)s[(CompileOptions)i(ExecOptions)i]"),
GB_CONSTANT("Caseless", "i", PCRE_CASELESS),
GB_CONSTANT("MultiLine", "i", PCRE_MULTILINE),
GB_CONSTANT("DotAll", "i", PCRE_DOTALL),
GB_CONSTANT("Extended", "i", PCRE_EXTENDED),
GB_CONSTANT("Anchored", "i", PCRE_ANCHORED),
GB_CONSTANT("DollarEndOnly", "i", PCRE_DOLLAR_ENDONLY),
GB_CONSTANT("Extra", "i", PCRE_EXTRA),
GB_CONSTANT("NotBOL", "i", PCRE_NOTBOL),
GB_CONSTANT("NotEOL", "i", PCRE_NOTEOL),
GB_CONSTANT("Ungreedy", "i", PCRE_UNGREEDY),
GB_CONSTANT("NotEmpty", "i", PCRE_NOTEMPTY),
GB_CONSTANT("UTF8", "i", PCRE_UTF8),
GB_CONSTANT("NoAutoCapture", "i", PCRE_NO_AUTO_CAPTURE),
GB_CONSTANT("NoUTF8Check", "i", PCRE_NO_UTF8_CHECK),
GB_CONSTANT("NoMatch", "i", PCRE_ERROR_NOMATCH),
GB_CONSTANT("Null", "i", PCRE_ERROR_NULL),
GB_CONSTANT("BadOption", "i", PCRE_ERROR_BADOPTION),
GB_CONSTANT("BadMagic", "i", PCRE_ERROR_BADMAGIC),
GB_CONSTANT("UnknownNode", "i", PCRE_ERROR_UNKNOWN_NODE),
GB_CONSTANT("NoMemory", "i", PCRE_ERROR_NOMEMORY),
GB_CONSTANT("NoSubstring", "i", PCRE_ERROR_NOSUBSTRING),
GB_CONSTANT("MatchLimit", "i", PCRE_ERROR_MATCHLIMIT),
GB_CONSTANT("Callout", "i", PCRE_ERROR_CALLOUT),
GB_CONSTANT("BadUTF8", "i", PCRE_ERROR_BADUTF8),
#if (((PCRE_MAJOR == 4) && (PCRE_MINOR < 5)) || (PCRE_MAJOR < 4))
GB_CONSTANT("BadUTF8Offset", "i", 65535), /* PCRE_ERROR_BADUTF8_OFFSET not defined < 4.5 */
#else
GB_CONSTANT("BadUTF8Offset", "i", PCRE_ERROR_BADUTF8_OFFSET),
#endif
GB_PROPERTY_SELF("SubMatches", ".Regexp.Submatches"),
GB_PROPERTY_READ("Text", "s", RegExp_Text), /* this is the string matched by the entire pattern */
GB_PROPERTY_READ("Offset", "i", RegExp_Offset), /* this is the string matched by the entire pattern */
GB_PROPERTY_READ("Pattern", "s", RegExp_Pattern),
GB_PROPERTY_READ("Subject", "s", RegExp_Subject),
GB_PROPERTY_READ("Error", "i", RegExp_Error),
GB_END_DECLARE
};
GB_DESC CRegexpSubmatchesDesc[] =
{
GB_DECLARE(".Regexp.Submatches", 0), GB_VIRTUAL_CLASS(),
GB_METHOD("_get", ".Regexp.Submatch", RegExp_Submatches_get, "(Index)i"),
GB_PROPERTY_READ("Count", "i", RegExp_Submatches_Count),
GB_END_DECLARE
};
GB_DESC CRegexpSubmatchDesc[] =
{
GB_DECLARE(".Regexp.Submatch", 0), GB_VIRTUAL_CLASS(),
GB_PROPERTY_READ("Offset", "i", RegExp_Submatch_Offset),
GB_PROPERTY_READ("Text", "s", RegExp_Submatch_Text),
GB_END_DECLARE
};