gambas-source-code/main/gbx/gbx_subr_string.c
Benoît Minisini ed62e2dc78 [DEVELOPMENT ENVIRONMENT]
* BUG: Store autoconf extra tests in the .project file, not in its own 
  hidden file anymore.
* NEW: Add KDE4 as possible icon theme.
* BUG: The packager wizard logic has been fixed.

[INTERPRETER]
* NEW: The Array class has now almost all standard array methods. They all 
  use Variant as array contents datatype, internally converting values to
  the real array datatype.
* NEW: Quote$() now keeps characters whose code is greater as 126 unquoted.

[COMPILER]
* NEW: '\b' and '\f' are two new escaped sequences in strings.
* BUG: Move some debugging breakpoint instructions so that the debugger 
  breaks at the right line when there is an error during an IF or a CASE 
  instruction.

[GB.EVAL]
* NEW: '\b' and '\f' are two new escaped sequences in strings.

[GB.FORM]
* NEW: Some new stock icons.

[GB.QT.EXT]
* NEW: Editor.Lines[].GetInitialState() fills the Highlight properties
  (State, Tag and AlternateState) with the initial state of a line.

[GB.WEB]
* NEW: JSON is a new class that can encode and decode the JSON format.


git-svn-id: svn://localhost/gambas/trunk@1925 867c0c6c-44f3-4631-809d-bfa615b0a4ec
2009-04-08 10:11:16 +00:00

1087 lines
17 KiB
C

/***************************************************************************
subr_string.c
The String management subroutines
(c) 2000-2007 Benoit Minisini <gambas@users.sourceforge.net>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
***************************************************************************/
#include "gb_common.h"
#include "gb_common_buffer.h"
#include "gb_common_case.h"
#include <ctype.h>
#include <regex.h>
#include "gb_pcode.h"
#include "gbx_value.h"
#include "gbx_subr.h"
#include "gbx_regexp.h"
#include "gbx_class.h"
#include "gbx_string.h"
#include "gbx_c_array.h"
#include "gbx_local.h"
#include "gbx_compare.h"
void SUBR_cat(void)
{
int i;
int len, len_cat;
char *str, *ptr;
SUBR_ENTER();
len_cat = 0;
for (i = 0; i < NPARAM; i++)
{
VALUE_conv_string(&PARAM[i]);
/*BORROW(&PARAM[i]);*/
len_cat += PARAM[i]._string.len;
}
STRING_new_temp(&str, NULL, len_cat);
ptr = str;
for (i = 0; i < NPARAM; i++)
{
len = PARAM[i]._string.len;
if (len > 0)
{
/*printf("add %p ", PARAM[i]._string.addr + PARAM[i]._string.start); fflush(NULL);
printf("%.*s\n", (int)len, PARAM[i]._string.addr + PARAM[i]._string.start);*/
memcpy(ptr, PARAM[i]._string.addr + PARAM[i]._string.start, len);
ptr += len;
}
}
/*printf("\n");*/
RETURN->type = T_STRING;
RETURN->_string.addr = str;
RETURN->_string.start = 0;
RETURN->_string.len = len_cat;
SUBR_LEAVE();
}
void SUBR_file(void)
{
int i;
int length;
char *addr;
int len;
char *str, *ptr;
boolean slash;
SUBR_ENTER();
length = 0;
slash = FALSE;
for (i = 0; i < NPARAM; i++)
{
/*VALUE_conv(&PARAM[i], T_STRING);*/
SUBR_get_string_len(&PARAM[i], &addr, &len);
if (len > 0)
{
if (length > 0)
{
if (!slash && (addr[0] != '/'))
length++;
}
slash = addr[len - 1] == '/';
length += len;
}
}
STRING_new_temp(&str, NULL, length);
ptr = str;
for (i = 0; i < NPARAM; i++)
{
VALUE_get_string(&PARAM[i], &addr, &len);
if (len > 0)
{
if ((ptr > str) && (ptr[-1] != '/') && (*addr != '/'))
*ptr++ = '/';
memcpy(ptr, addr, len);
ptr += len;
}
}
RETURN->type = T_STRING;
RETURN->_string.addr = str;
RETURN->_string.start = 0;
RETURN->_string.len = length;
SUBR_LEAVE();
}
#if 0
void SUBR_left(void)
{
int val;
SUBR_ENTER();
if (SUBR_check_string(PARAM))
goto _FIN;
if (NPARAM == 1)
val = 1;
else
{
VALUE_conv(&PARAM[1], T_INTEGER);
val = PARAM[1]._integer.value;
}
if (val < 0)
val += PARAM->_string.len;
PARAM->_string.len = MinMax(val, 0, PARAM->_string.len);
_FIN:
SP -= NPARAM;
SP++;
}
void SUBR_right(void)
{
int val;
int new_len;
SUBR_ENTER();
if (SUBR_check_string(PARAM))
goto _FIN;
if (NPARAM == 1)
val = 1;
else
{
VALUE_conv(&PARAM[1], T_INTEGER);
val = PARAM[1]._integer.value;
}
if (val < 0)
val += PARAM->_string.len;
new_len = MinMax(val, 0, PARAM->_string.len);
PARAM->_string.start += PARAM->_string.len - new_len;
PARAM->_string.len = new_len;
_FIN:
SP -= NPARAM;
SP++;
}
void SUBR_mid(void)
{
int start;
int len;
SUBR_ENTER();
if (SUBR_check_string(PARAM))
goto FIN;
VALUE_conv(&PARAM[1], T_INTEGER);
start = PARAM[1]._integer.value - 1;
if (start < 0)
THROW(E_ARG);
if (start >= PARAM->_string.len)
{
RELEASE(PARAM);
STRING_void_value(PARAM);
goto FIN;
}
if (NPARAM == 2)
len = PARAM->_string.len;
else
{
VALUE_conv(&PARAM[2], T_INTEGER);
len = PARAM[2]._integer.value;
}
if (len < 0)
len = Max(0, PARAM->_string.len - start + len);
len = MinMax(len, 0, PARAM->_string.len - start);
if (len == 0)
{
RELEASE(PARAM);
PARAM->_string.addr = NULL;
PARAM->_string.start = 0;
}
else
PARAM->_string.start += start;
PARAM->_string.len = len;
FIN:
SP -= NPARAM;
SP++;
}
void SUBR_len(void)
{
int len;
SUBR_GET_PARAM(1);
if (SUBR_check_string(PARAM))
len = 0;
else
len = PARAM->_string.len;
RELEASE(PARAM);
PARAM->type = T_INTEGER;
PARAM->_integer.value = len;
}
#endif
void SUBR_space(void)
{
int len;
SUBR_ENTER_PARAM(1);
SUBR_check_integer(PARAM);
len = PARAM->_integer.value;
if (len < 0)
THROW(E_ARG);
if (len == 0)
{
STRING_void_value(RETURN);
}
else
{
STRING_new_temp_value(RETURN, NULL, len);
memset(RETURN->_string.addr, ' ', len);
}
SUBR_LEAVE();
}
void SUBR_string(void)
{
int i;
char *d;
char *s;
int ld, ls;
SUBR_ENTER_PARAM(2);
SUBR_check_integer(PARAM);
SUBR_get_string_len(&PARAM[1], &s, &ls);
ld = PARAM->_integer.value * ls;
if (ld < 0)
THROW(E_ARG);
if (ld == 0)
{
STRING_void_value(RETURN);
}
else
{
STRING_new_temp_value(RETURN, NULL, ld);
d = RETURN->_string.addr;
for (i = 0; i < PARAM->_integer.value; i++)
{
memcpy(d, s, ls);
d += ls;
}
*d = 0;
}
SUBR_LEAVE();
}
void SUBR_trim(void)
{
unsigned char *str;
bool left, right;
int code;
SUBR_GET_PARAM(1);
if (SUBR_check_string(PARAM))
return;
code = EXEC_code & 0x1F;
left = (code == 0 || code == 1);
right = (code == 0 || code == 2);
/* if (!(left || right))
THROW(E_ILLEGAL);*/
if (PARAM->_string.len > 0)
{
str = (uchar *)&PARAM->_string.addr[PARAM->_string.start];
if (left)
{
while (PARAM->_string.len > 0 && *str <= ' ')
{
PARAM->_string.start++;
PARAM->_string.len--;
str++;
}
}
if (right)
{
while (PARAM->_string.len > 0 && str[PARAM->_string.len - 1] <= ' ')
{
PARAM->_string.len--;
}
}
}
}
#define STRING_APPLY(_func) \
char *str; \
int len, i; \
\
SUBR_ENTER_PARAM(1); \
\
if (SUBR_check_string(PARAM)) \
STRING_void_value(RETURN); \
else \
{ \
len = PARAM->_string.len; \
if (len > 0) \
{ \
STRING_new_temp(&str, &PARAM->_string.addr[PARAM->_string.start], PARAM->_string.len); \
\
for (i = 0; i < len; i++) \
str[i] = _func(str[i]); \
\
RETURN->type = T_STRING; \
RETURN->_string.addr = str; \
RETURN->_string.start = 0; \
RETURN->_string.len = len; \
} \
} \
\
SUBR_LEAVE();
void SUBR_upper(void)
{
STRING_APPLY(toupper);
}
void SUBR_lower(void)
{
STRING_APPLY(tolower);
}
void SUBR_chr(void)
{
int car;
SUBR_GET_PARAM(1);
VALUE_conv(PARAM, T_INTEGER);
/*SUBR_check_integer(PARAM);*/
car = PARAM->_integer.value;
if (car < 0 || car > 255)
THROW(E_ARG);
STRING_char_value(PARAM, car);
}
void SUBR_asc(void)
{
int pos = 0;
SUBR_ENTER();
if (!SUBR_check_string(PARAM))
{
pos = 1;
if (NPARAM == 2)
{
SUBR_check_integer(&PARAM[1]);
pos = PARAM[1]._integer.value;
}
if (pos < 1 || pos > PARAM->_string.len)
pos = 0;
else
pos = (unsigned char)PARAM->_string.addr[PARAM->_string.start + pos - 1];
}
RETURN->type = T_INTEGER;
RETURN->_integer.value = pos;
SUBR_LEAVE();
}
void SUBR_instr(void)
{
boolean right, nocase = FALSE;
int is, pos;
char *ps, *pp;
int ls, lp;
SUBR_ENTER();
/* Knuth Morris Pratt one day maybe ? */
pos = 0;
if (SUBR_check_string(PARAM))
goto __FOUND;
if (SUBR_check_string(&PARAM[1]))
goto __FOUND;
lp = PARAM[1]._string.len;
ls = PARAM->_string.len;
right = ((EXEC_code >> 8) == CODE_RINSTR);
if (lp > ls) goto __FOUND;
is = 0;
if (NPARAM >= 3)
is = SUBR_get_integer(&PARAM[2]);
if (NPARAM == 4)
nocase = SUBR_get_integer(&PARAM[3]) == GB_COMP_TEXT;
ps = PARAM->_string.addr + PARAM->_string.start;
pp = PARAM[1]._string.addr + PARAM[1]._string.start;
pos = STRING_search(ps, ls, pp, lp, is, right, nocase);
__FOUND:
RETURN->type = T_INTEGER;
RETURN->_integer.value = pos;
SUBR_LEAVE();
}
void SUBR_like(void)
{
char *pattern;
char *string;
int len_pattern, len_string;
boolean ret;
SUBR_ENTER_PARAM(2);
SUBR_get_string_len(&PARAM[0], &string, &len_string);
SUBR_get_string_len(&PARAM[1], &pattern, &len_pattern);
ret = REGEXP_match(pattern, len_pattern, string, len_string) ? -1 : 0;
RETURN->type = T_BOOLEAN;
RETURN->_boolean.value = ret;
SUBR_LEAVE();
}
static int subst_nparam;
static VALUE *subst_param;
static void get_subst(int np, char **str, int *len)
{
if (np > 0 && np < subst_nparam)
VALUE_get_string(&subst_param[np], str, len);
else
{
*str = NULL;
*len = 0;
}
}
void SUBR_subst(void)
{
char *string;
int len;
int np;
SUBR_ENTER();
SUBR_get_string_len(&PARAM[0], &string, &len);
for (np = 1; np < NPARAM; np++)
VALUE_conv_string(&PARAM[np]);
subst_param = PARAM;
subst_nparam = NPARAM;
string = STRING_subst(string, len, get_subst);
/*for (np = 0; np < NPARAM; np++)
RELEASE_STRING(&PARAM[np]);*/
RETURN->type = T_STRING;
RETURN->_string.addr = (char *)string;
RETURN->_string.start = 0;
RETURN->_string.len = STRING_length(string);
SUBR_LEAVE();
}
void SUBR_replace(void)
{
char *ps;
char *pp;
char *pr;
int ls, lp, lr;
int is, pos;
bool nocase = FALSE;
SUBR_ENTER();
SUBR_get_string_len(&PARAM[0], &ps, &ls);
SUBR_get_string_len(&PARAM[1], &pp, &lp);
SUBR_get_string_len(&PARAM[2], &pr, &lr);
if (NPARAM == 4)
nocase = SUBR_get_integer(&PARAM[3]) == GB_COMP_TEXT;
STRING_start_len(ls);
if (ls > 0 && lp > 0)
{
is = 0;
for(;;)
{
pos = STRING_search(ps, ls, pp, lp, 1, FALSE, nocase);
if (pos == 0)
break;
pos--;
if (pos > 0)
STRING_make(ps, pos);
STRING_make(pr, lr);
pos += lp;
ps += pos;
ls -= pos;
if (ls <= 0)
break;
}
STRING_make(ps, ls);
}
RETURN->type = T_STRING;
RETURN->_string.addr = STRING_end_temp();
RETURN->_string.start = 0;
RETURN->_string.len = STRING_length(RETURN->_string.addr);
SUBR_LEAVE();
}
void SUBR_split(void)
{
CARRAY *array;
char *str;
int lstr;
char *sep = "";
char *esc = "";
bool no_void = FALSE;
SUBR_ENTER();
SUBR_get_string_len(&PARAM[0], &str, &lstr);
if (NPARAM >= 2)
{
sep = SUBR_get_string(&PARAM[1]);
if (NPARAM >= 3)
{
esc = SUBR_get_string(&PARAM[2]);
if (NPARAM == 4)
{
VALUE_conv(&PARAM[3], T_BOOLEAN);
no_void = PARAM[3]._boolean.value;
}
}
}
OBJECT_create((void **)(void *)&array, CLASS_StringArray, NULL, NULL, 0);
if (lstr)
{
if (*sep) STRING_ref(sep);
if (*esc) STRING_ref(esc);
CARRAY_split(array, str, lstr, sep, esc, no_void);
if (*sep) STRING_unref(&sep);
if (*esc) STRING_unref(&esc);
}
RETURN->_object.class = CLASS_StringArray;
RETURN->_object.object = array;
SUBR_LEAVE();
}
void SUBR_scan(void)
{
CARRAY *array;
char *str;
int len_str;
char *pat;
int len_pat;
SUBR_ENTER_PARAM(2);
SUBR_get_string_len(&PARAM[0], &str, &len_str);
SUBR_get_string_len(&PARAM[1], &pat, &len_pat);
OBJECT_create((void **)(void *)&array, CLASS_StringArray, NULL, NULL, 0);
if (len_str && len_pat)
REGEXP_scan(array, pat, len_pat, str, len_str);
RETURN->_object.class = CLASS_StringArray;
RETURN->_object.object = array;
SUBR_LEAVE();
}
void SUBR_iconv(void)
{
char *str;
const char *src;
const char *dst;
char *result;
int len;
SUBR_ENTER_PARAM(3);
str = SUBR_get_string(&PARAM[0]);
len = PARAM[0]._string.len;
src = SUBR_get_string(&PARAM[1]);
dst = SUBR_get_string(&PARAM[2]);
STRING_conv(&result, str, len, src, dst, TRUE);
if (!result)
RETURN->type = T_NULL;
else
{
RETURN->type = T_STRING;
RETURN->_string.addr = result;
RETURN->_string.start = 0;
RETURN->_string.len = STRING_length(result);
}
SUBR_LEAVE();
}
void SUBR_sconv(void)
{
char *str;
const char *src;
const char *dst;
char *result;
int len;
SUBR_ENTER_PARAM(1);
if (LOCAL_is_UTF8)
return;
str = SUBR_get_string(&PARAM[0]);
len = PARAM[0]._string.len;
if (EXEC_code & 0xF)
{
src = LOCAL_encoding;
dst = "UTF-8";
}
else
{
src = "UTF-8";
dst = LOCAL_encoding;
}
STRING_conv(&result, str, len, src, dst, TRUE);
if (!result)
RETURN->type = T_NULL;
else
{
RETURN->type = T_STRING;
RETURN->_string.addr = result;
RETURN->_string.start = 0;
RETURN->_string.len = STRING_length(result);
}
SUBR_LEAVE();
}
static int _is_ascii(int c)
{
return (c & ~0x7F) == 0;
}
static int _is_letter(int c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
static int _is_lower(int c)
{
return (c >= 'a' && c <= 'z');
}
static int _is_upper(int c)
{
return (c >= 'A' && c <= 'Z');
}
static int _is_digit(int c)
{
return (c >= '0' && c <= '9');
}
static int _is_hexa(int c)
{
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
static int _is_space(int c)
{
return strchr(" \n\r\t\f\v", c) != NULL;
}
static int _is_blank(int c)
{
return (c == 32 || c == '\t');
}
static int _is_punct(int c)
{
return ((c > 32) && (c < 128) && !(_is_letter(c) || _is_digit(c)));
}
void SUBR_is_chr(void)
{
static void *jump[] =
{
NULL, _is_ascii, _is_letter, _is_lower, _is_upper, _is_digit, _is_hexa, _is_space, _is_blank, _is_punct
};
char *addr;
int len;
int i;
int (*func)(int);
SUBR_ENTER_PARAM(1);
VALUE_conv_string(PARAM);
SUBR_get_string_len(PARAM, &addr, &len);
func = jump[EXEC_code & 0x3F];
for (i = 0; i < len; i++)
{
if (!(*func)(addr[i]))
break;
}
RETURN->type = T_BOOLEAN;
RETURN->_boolean.value = (len > 0 && i >= len) ? -1 : 0;
SUBR_LEAVE();
}
void SUBR_tr(void)
{
char *str;
SUBR_ENTER_PARAM(1);
VALUE_conv_string(&PARAM[0]);
if (SUBR_check_string(PARAM))
STRING_void_value(RETURN);
else
{
STRING_new_temp(&str, &PARAM->_string.addr[PARAM->_string.start], PARAM->_string.len);
RETURN->type = T_CSTRING;
RETURN->_string.addr = (char *)LOCAL_gettext(str);
RETURN->_string.start = 0;
RETURN->_string.len = strlen(RETURN->_string.addr);
}
SUBR_LEAVE();
}
void SUBR_quote(void)
{
static void *jump[4] = { &&__QUOTE, &&__SHELL, &&__HTML, &&__QUOTE };
char *str;
int lstr;
int i;
unsigned char c;
char buf[8];
SUBR_ENTER_PARAM(1);
VALUE_conv_string(&PARAM[0]);
str = PARAM->_string.addr + PARAM->_string.start;
lstr = PARAM->_string.len;
STRING_start_len(lstr);
goto *jump[EXEC_code & 0x3];
__QUOTE:
STRING_make_char('"');
for (i = 0; i < lstr; i++)
{
c = str[i];
//if (c >= ' ' && c <= 126 && c != '\\' && c != '"')
if (c >= ' ' && c != '\\' && c != '"')
STRING_make_char(c);
else
{
STRING_make_char('\\');
if (c == '\n')
STRING_make_char('n');
else if (c == '\r')
STRING_make_char('r');
else if (c == '\t')
STRING_make_char('t');
else if (c == '"')
STRING_make_char('"');
else if (c == '\\')
STRING_make_char('\\');
else
{
snprintf(buf, sizeof(buf), "x%02X", c);
STRING_make(buf, 3);
}
}
}
STRING_make_char('"');
goto __END;
__SHELL:
if (!LOCAL_is_UTF8)
{
char *conv;
STRING_conv(&conv, str, lstr, "UTF-8", LOCAL_encoding, FALSE);
str = conv;
lstr = str ? strlen(str) : 0;
}
for (i = 0; i < lstr; i++)
{
c = str[i];
if (c == '\n')
STRING_make("$'\\n'", 5);
else if (c == '\r')
STRING_make("$'\\r'", 5);
else if (c == '\t')
STRING_make("$'\\t'", 5);
else if (c < ' ') //|| (c > 126 && !LOCAL_is_UTF8))
{
snprintf(buf, sizeof(buf), "$'\\x%02X'", c);
STRING_make(buf, 7);
}
else if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || index(".-/_~", c) || c > 126)
STRING_make_char(c);
else
{
STRING_make_char('\\');
STRING_make_char(c);
}
}
goto __END;
__HTML:
for (i = 0; i < lstr; i++)
{
c = str[i];
if (c == '&')
STRING_make("&amp;", 5);
else if (c == '<')
STRING_make("&lt;", 4);
else if (c == '>')
STRING_make("&gt;", 4);
else if (c == '"')
STRING_make("&quot;", 6);
else
STRING_make_char(c);
}
goto __END;
__END:
RETURN->type = T_STRING;
RETURN->_string.addr = STRING_end_temp();
RETURN->_string.start = 0;
RETURN->_string.len = STRING_length(RETURN->_string.addr);
SUBR_LEAVE();
}
static int read_hex_digit(unsigned char c)
{
if (c >= '0' && c <= '9')
return (c - '0');
else if (c >= 'A' && c <= 'F')
return (c - 'A' + 10);
else if (c >= 'a' && c <= 'f')
return (c - 'a' + 10);
else
return 0;
}
void SUBR_unquote(void)
{
char *str;
int lstr;
int i;
unsigned char c;
SUBR_ENTER_PARAM(1);
VALUE_conv_string(&PARAM[0]);
str = PARAM->_string.addr + PARAM->_string.start;
lstr = PARAM->_string.len;
STRING_start_len(lstr);
if (lstr >= 2 && str[0] == '"' && str[lstr - 1] == '"')
{
str++;
lstr -= 2;
}
for (i = 0; i < lstr; i++)
{
c = str[i];
if (c == '\\')
{
i++;
if (i >= lstr)
break;
c = str[i];
if (c == 'n')
c = '\n';
else if (c == 't')
c = '\t';
else if (c == 'r')
c = '\r';
else if (c == 'x')
{
if (i >= (lstr - 2))
break;
c = (read_hex_digit(str[i + 1]) << 4) + read_hex_digit(str[i + 2]);
i += 2;
}
}
STRING_make_char(c);
}
RETURN->type = T_STRING;
RETURN->_string.addr = STRING_end_temp();
RETURN->_string.start = 0;
RETURN->_string.len = STRING_length(RETURN->_string.addr);
SUBR_LEAVE();
}