/*************************************************************************** regexp.c (c) 2004 Rob Kudla (c) 2000-2012 BenoƮt Minisini This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ***************************************************************************/ #define __REGEXP_C #include "gb_common.h" #include "regexp.h" #include "main.h" #define OVECSIZE_INC 99 /*************************************************************************** Regexp ***************************************************************************/ static void compile(void *_object) { int errptr; const char *errstr; if (!THIS->pattern) { GB.Error("No pattern provided"); return; } if (THIS->code) free(THIS->code); THIS->code = pcre_compile(THIS->pattern, THIS->copts, &errstr, &errptr, NULL); if (!THIS->code) { THIS->error = errptr; GB.Error(errstr); } } static void exec(void *_object) { int ret; char code[8]; if (!THIS->code) { GB.Error("No pattern compiled yet"); return; } if (!THIS->subject) { GB.Error("No subject provided"); return; } for(;;) { ret = pcre_exec(THIS->code, NULL, THIS->subject, GB.StringLength(THIS->subject), 0, THIS->eopts, THIS->ovector, THIS->ovecsize); if (ret > 0) { THIS->error = 0; THIS->count = ret; break; } else if (ret < 0) { THIS->error = ret; switch (ret) { case PCRE_ERROR_NOMATCH: THIS->count = 0; return; case PCRE_ERROR_NULL: GB.Error("Pattern or subject is null"); return; case PCRE_ERROR_BADOPTION: GB.Error("Unknown option"); return; case PCRE_ERROR_BADMAGIC: case PCRE_ERROR_UNKNOWN_OPCODE: GB.Error("Incorrect PCRE bytecode"); return; case PCRE_ERROR_NOMEMORY: GB.Error("Out of memory"); return; case PCRE_ERROR_BADUTF8: #ifdef PCRE_ERROR_SHORTUTF8 case PCRE_ERROR_SHORTUTF8: #endif GB.Error("Bad UTF-8 string"); return; #ifdef PCRE_ERROR_BADUTF8_OFFSET case PCRE_ERROR_BADUTF8_OFFSET: GB.Error("Bad UTF-8 offset"); return; #endif case PCRE_ERROR_INTERNAL: GB.Error("Unexpected internal error"); return; case PCRE_ERROR_BADNEWLINE: GB.Error("Invalid combination of newline options"); return; //case PCRE_ERROR_RECURSELOOP: // GB.Error("Recursion loop detected"); return; //case PCRE_ERROR_JIT_STACKLIMIT: // GB.Error("JIT stack limit reached"); return; default: sprintf(code, "%d", -ret); GB.Error("Unable to exec regular expression: error #&1", code); return; } } THIS->ovecsize += OVECSIZE_INC; GB.Realloc(POINTER(&THIS->ovector), THIS->ovecsize * sizeof(int)); } } static void return_match(void *_object, int index) { if (index < 0 || index >= THIS->count) { GB.Error("Out of bounds"); return; } index *= 2; GB.ReturnNewString(&THIS->subject[THIS->ovector[index]], THIS->ovector[index + 1] - THIS->ovector[index]); } BEGIN_METHOD(RegExp_Compile, GB_STRING pattern; GB_INTEGER coptions) THIS->copts = VARGOPT(coptions, 0); GB.FreeString(&THIS->pattern); THIS->pattern = GB.NewString(STRING(pattern), LENGTH(pattern)); compile(THIS); END_METHOD BEGIN_METHOD(RegExp_Exec, GB_STRING subject; GB_INTEGER eoptions) THIS->eopts = VARGOPT(eoptions, 0); GB.FreeString(&THIS->subject); THIS->subject = GB.NewString(STRING(subject), LENGTH(subject)); exec(THIS); END_METHOD BEGIN_METHOD(RegExp_new, GB_STRING subject; GB_STRING pattern; GB_INTEGER coptions; GB_INTEGER eoptions) THIS->ovecsize = OVECSIZE_INC; GB.Alloc(POINTER(&THIS->ovector), sizeof(int) * THIS->ovecsize); if (MISSING(pattern)) // the user didn't provide a pattern. return; THIS->copts = VARGOPT(coptions, 0); THIS->pattern = GB.NewString(STRING(pattern), LENGTH(pattern)); THIS->code = NULL; compile(THIS); if (!THIS->code) // we didn't get a compiled pattern back. return; if (MISSING(subject)) // the user didn't specify any subject text. return; THIS->eopts = VARGOPT(eoptions, 0); THIS->subject = GB.NewString(STRING(subject), LENGTH(subject)); exec(THIS); END_METHOD BEGIN_METHOD_VOID(RegExp_free) if (THIS->code) free(THIS->code); GB.FreeString(&THIS->subject); GB.FreeString(&THIS->pattern); GB.Free(POINTER(&THIS->ovector)); END_METHOD BEGIN_METHOD(RegExp_Match, GB_STRING subject; GB_STRING pattern; GB_INTEGER coptions; GB_INTEGER eoptions) /* * The gb.pcre internal routines don't require the GB_BASE to be * initialised by Gambas! */ CREGEXP tmp; bool ret = FALSE; CLEAR(&tmp); tmp.ovecsize = OVECSIZE_INC; GB.Alloc(POINTER(&tmp.ovector), sizeof(int) * tmp.ovecsize); tmp.copts = VARGOPT(coptions, 0); tmp.pattern = GB.NewString(STRING(pattern), LENGTH(pattern)); compile(&tmp); if (tmp.code) { tmp.eopts = VARGOPT(eoptions, 0); tmp.subject = GB.NewString(STRING(subject), LENGTH(subject)); exec(&tmp); ret = (tmp.ovector[0] != -1); } RegExp_free(&tmp, NULL); GB.ReturnBoolean(ret); END_METHOD BEGIN_PROPERTY(RegExp_Pattern) GB.ReturnString(THIS->pattern); END_PROPERTY BEGIN_PROPERTY(RegExp_Subject) GB.ReturnString(THIS->subject); END_PROPERTY BEGIN_PROPERTY(RegExp_Offset) GB.ReturnInteger(THIS->ovector[0]); END_PROPERTY BEGIN_PROPERTY(RegExp_Text) if (THIS->count == 0) GB.ReturnVoidString(); else return_match(THIS, 0); END_PROPERTY BEGIN_PROPERTY(RegExp_Error) GB.ReturnInteger(THIS->error); END_PROPERTY BEGIN_PROPERTY(RegExp_Submatches_Count) GB.ReturnInteger(THIS->count - 1); END_PROPERTY BEGIN_METHOD(RegExp_Submatches_get, GB_INTEGER index) int index = VARG(index); if (index < 0 || index >= THIS->count) { GB.Error("Out of bounds"); return; } THIS->_submatch = index; RETURN_SELF(); END_METHOD BEGIN_PROPERTY(RegExp_Submatch_Text) return_match(THIS, THIS->_submatch); END_PROPERTY BEGIN_PROPERTY(RegExp_Submatch_Offset) GB.ReturnInteger(THIS->ovector[2 * THIS->_submatch]); END_PROPERTY GB_DESC CRegexpDesc[] = { GB_DECLARE("Regexp", sizeof(CREGEXP)), GB_METHOD("_new", NULL, RegExp_new, "[(Subject)s(Pattern)s(CompileOptions)i(ExecOptions)i]"), GB_METHOD("_free", NULL, RegExp_free, NULL), GB_METHOD("Compile", NULL, RegExp_Compile, "(Pattern)s[(CompileOptions)i]"), GB_METHOD("Exec", NULL, RegExp_Exec, "(Subject)s[(ExecOptions)i]"), GB_STATIC_METHOD("Match", "b", RegExp_Match, "(Subject)s(Pattern)s[(CompileOptions)i(ExecOptions)i]"), GB_CONSTANT("Caseless", "i", PCRE_CASELESS), GB_CONSTANT("MultiLine", "i", PCRE_MULTILINE), GB_CONSTANT("DotAll", "i", PCRE_DOTALL), GB_CONSTANT("Extended", "i", PCRE_EXTENDED), GB_CONSTANT("Anchored", "i", PCRE_ANCHORED), GB_CONSTANT("DollarEndOnly", "i", PCRE_DOLLAR_ENDONLY), GB_CONSTANT("Extra", "i", PCRE_EXTRA), GB_CONSTANT("NotBOL", "i", PCRE_NOTBOL), GB_CONSTANT("NotEOL", "i", PCRE_NOTEOL), GB_CONSTANT("Ungreedy", "i", PCRE_UNGREEDY), GB_CONSTANT("NotEmpty", "i", PCRE_NOTEMPTY), GB_CONSTANT("UTF8", "i", PCRE_UTF8), GB_CONSTANT("NoAutoCapture", "i", PCRE_NO_AUTO_CAPTURE), GB_CONSTANT("NoUTF8Check", "i", PCRE_NO_UTF8_CHECK), GB_CONSTANT("NoMatch", "i", PCRE_ERROR_NOMATCH), GB_CONSTANT("Null", "i", PCRE_ERROR_NULL), GB_CONSTANT("BadOption", "i", PCRE_ERROR_BADOPTION), GB_CONSTANT("BadMagic", "i", PCRE_ERROR_BADMAGIC), GB_CONSTANT("UnknownNode", "i", PCRE_ERROR_UNKNOWN_NODE), GB_CONSTANT("NoMemory", "i", PCRE_ERROR_NOMEMORY), GB_CONSTANT("NoSubstring", "i", PCRE_ERROR_NOSUBSTRING), GB_CONSTANT("MatchLimit", "i", PCRE_ERROR_MATCHLIMIT), GB_CONSTANT("Callout", "i", PCRE_ERROR_CALLOUT), GB_CONSTANT("BadUTF8", "i", PCRE_ERROR_BADUTF8), #if (((PCRE_MAJOR == 4) && (PCRE_MINOR < 5)) || (PCRE_MAJOR < 4)) GB_CONSTANT("BadUTF8Offset", "i", 65535), /* PCRE_ERROR_BADUTF8_OFFSET not defined < 4.5 */ #else GB_CONSTANT("BadUTF8Offset", "i", PCRE_ERROR_BADUTF8_OFFSET), #endif GB_PROPERTY_SELF("SubMatches", ".Regexp.Submatches"), GB_PROPERTY_READ("Text", "s", RegExp_Text), /* this is the string matched by the entire pattern */ GB_PROPERTY_READ("Offset", "i", RegExp_Offset), /* this is the string matched by the entire pattern */ GB_PROPERTY_READ("Pattern", "s", RegExp_Pattern), GB_PROPERTY_READ("Subject", "s", RegExp_Subject), GB_PROPERTY_READ("Error", "i", RegExp_Error), GB_END_DECLARE }; GB_DESC CRegexpSubmatchesDesc[] = { GB_DECLARE(".Regexp.Submatches", 0), GB_VIRTUAL_CLASS(), GB_METHOD("_get", ".Regexp.Submatch", RegExp_Submatches_get, "(Index)i"), GB_PROPERTY_READ("Count", "i", RegExp_Submatches_Count), GB_END_DECLARE }; GB_DESC CRegexpSubmatchDesc[] = { GB_DECLARE(".Regexp.Submatch", 0), GB_VIRTUAL_CLASS(), GB_PROPERTY_READ("Offset", "i", RegExp_Submatch_Offset), GB_PROPERTY_READ("Text", "s", RegExp_Submatch_Text), GB_END_DECLARE };