2007-12-30 17:41:49 +01:00
|
|
|
/***************************************************************************
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
gbx_c_string.c
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
(c) 2000-2011 Benoît Minisini <gambas@users.sourceforge.net>
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2, or (at your option)
|
|
|
|
any later version.
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
2011-06-03 02:51:09 +02:00
|
|
|
MA 02110-1301, USA.
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
#define __GBX_C_STRING_C
|
|
|
|
|
|
|
|
#include "gbx_info.h"
|
|
|
|
|
|
|
|
#ifndef GBX_INFO
|
|
|
|
|
|
|
|
#include "gb_common.h"
|
|
|
|
|
|
|
|
#include <wctype.h>
|
|
|
|
#include <wchar.h>
|
|
|
|
#include <iconv.h>
|
|
|
|
|
|
|
|
#include "gb_error.h"
|
|
|
|
#include "gb_table.h"
|
|
|
|
#include "gbx_string.h"
|
|
|
|
#include "gbx_api.h"
|
|
|
|
#include "gbx_exec.h"
|
2011-09-08 18:01:36 +02:00
|
|
|
#include "gbx_subr.h"
|
2007-12-30 17:41:49 +01:00
|
|
|
#include "gbx_compare.h"
|
|
|
|
#include "gambas.h"
|
|
|
|
|
|
|
|
#include "gbx_c_string.h"
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
#define UNICODE_INVALID 0xFFFFFFFFU
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
static const char _char_length[256] =
|
|
|
|
{
|
2011-09-08 18:01:36 +02:00
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
|
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
|
2007-12-30 17:41:49 +01:00
|
|
|
};
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
/***************************************************************************/
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
#define utf8_get_char_length(_c) (_char_length[(unsigned char)(_c)])
|
2011-08-27 21:49:48 +02:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
int STRING_get_utf8_char_length(unsigned char c)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
2011-09-08 18:01:36 +02:00
|
|
|
return utf8_get_char_length(c);
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
static int utf8_get_length(const char *str, int len)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
2011-09-08 18:01:36 +02:00
|
|
|
int ulen;
|
2007-12-30 17:41:49 +01:00
|
|
|
int i;
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
ulen = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < len; i++)
|
|
|
|
{
|
|
|
|
if ((str[i] & 0xC0) != 0x80)
|
|
|
|
ulen++;
|
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
return ulen;
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
static uint utf8_get_unicode(char *str, int len)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
2011-09-08 18:01:36 +02:00
|
|
|
uint unicode;
|
|
|
|
|
|
|
|
switch (len)
|
|
|
|
{
|
|
|
|
case 2:
|
|
|
|
unicode = (str[1] & 0x3F) + ((str[0] & 0x1F) << 6);
|
|
|
|
if (unicode < 0x80)
|
|
|
|
goto _INVALID;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 3:
|
|
|
|
unicode = (str[2] & 0x3F) + ((str[1] & 0x3F) << 6) + ((str[0] & 0xF) << 12);
|
|
|
|
if (unicode < 0x800)
|
|
|
|
goto _INVALID;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 4:
|
|
|
|
unicode = (str[3] & 0x3F) + ((str[2] & 0x3F) << 6) + ((str[1] & 0x3F) << 12) + ((str[0] & 0x7) << 18);
|
|
|
|
if (unicode < 0x10000)
|
|
|
|
goto _INVALID;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 5:
|
|
|
|
unicode = (str[4] & 0x3F) + ((str[3] & 0x3F) << 6) + ((str[2] & 0x3F) << 12) + ((str[1] & 0x3F) << 18) + ((str[0] & 0x3) << 24);
|
|
|
|
if (unicode < 0x200000)
|
|
|
|
goto _INVALID;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 6:
|
|
|
|
unicode = (str[5] & 0x3F) + ((str[4] & 0x3F) << 6) + ((str[3] & 0x3F) << 12) + ((str[2] & 0x3F) << 18) + ((str[1] & 0x3F) << 24) + ((str[0] & 0x1) << 30);
|
|
|
|
if (unicode < 0x4000000)
|
|
|
|
goto _INVALID;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
unicode = str[0];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return unicode;
|
|
|
|
|
|
|
|
_INVALID:
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
return UNICODE_INVALID;
|
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
/***************************************************************************/
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
char *STRING_utf8_current = NULL;
|
|
|
|
#define UTF8_POS_COUNT 256
|
|
|
|
static short _utf8_pos[UTF8_POS_COUNT];
|
|
|
|
static int _utf8_last_pos;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
static int utf8_get_pos(const char *str, int len, int index)
|
|
|
|
{
|
|
|
|
int i, pos;
|
|
|
|
|
|
|
|
if (index <= 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (str != STRING_utf8_current)
|
|
|
|
{
|
|
|
|
STRING_utf8_current = (char *)str;
|
|
|
|
_utf8_last_pos = 0;
|
|
|
|
_utf8_pos[0] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (index > len)
|
|
|
|
index = len;
|
|
|
|
|
|
|
|
if (index <= _utf8_last_pos)
|
|
|
|
return _utf8_pos[index];
|
|
|
|
|
|
|
|
pos = _utf8_pos[_utf8_last_pos];
|
|
|
|
|
|
|
|
while (_utf8_last_pos < (UTF8_POS_COUNT - 1))
|
|
|
|
{
|
|
|
|
pos += utf8_get_char_length(str[pos]);
|
|
|
|
_utf8_pos[++_utf8_last_pos] = pos;
|
|
|
|
if (index == _utf8_last_pos)
|
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = UTF8_POS_COUNT - 1; i < index; i++)
|
|
|
|
pos += utf8_get_char_length(str[pos]);
|
|
|
|
|
|
|
|
return pos;
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
/***************************************************************************/
|
|
|
|
|
2008-01-17 22:39:26 +01:00
|
|
|
static int byte_to_index(const char *str, int len, int byte)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
2011-09-08 18:01:36 +02:00
|
|
|
if (byte <= 0)
|
|
|
|
return 0;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
if (byte > len)
|
|
|
|
byte = len;
|
|
|
|
|
|
|
|
return utf8_get_length(str, byte);
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
|
|
|
|
2008-01-17 22:39:26 +01:00
|
|
|
static int index_to_byte(const char *str, int len, int index)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
|
|
|
if (index <= 0)
|
|
|
|
return 0;
|
|
|
|
|
2011-09-19 03:16:29 +02:00
|
|
|
return utf8_get_pos(str, len, index - 1) + 1;
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-08-27 21:49:48 +02:00
|
|
|
BEGIN_METHOD(String_Pos, GB_STRING str; GB_INTEGER index)
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_ReturnInteger(index_to_byte(STRING(str), LENGTH(str), VARG(index)));
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
END_METHOD
|
|
|
|
|
|
|
|
|
2011-08-27 21:49:48 +02:00
|
|
|
BEGIN_METHOD(String_Len, GB_STRING str)
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_ReturnInteger(utf8_get_length(STRING(str), LENGTH(str)));
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
END_METHOD
|
|
|
|
|
|
|
|
|
2011-08-27 21:49:48 +02:00
|
|
|
BEGIN_METHOD(String_Index, GB_STRING str; GB_INTEGER pos)
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_ReturnInteger(byte_to_index(STRING(str), LENGTH(str), VARG(pos)));
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
END_METHOD
|
|
|
|
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
static void String_Mid(ushort code)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
2011-09-08 18:01:36 +02:00
|
|
|
char *str;
|
|
|
|
int start, length;
|
|
|
|
int len, ulen;
|
|
|
|
bool null;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
SUBR_ENTER();
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
null = SUBR_check_string(PARAM);
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
VALUE_conv_integer(&PARAM[1]);
|
|
|
|
start = PARAM[1]._integer.value - 1;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
if (start < 0)
|
|
|
|
THROW(E_ARG);
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
if (null)
|
|
|
|
goto _SUBR_MID_FIN;
|
|
|
|
|
|
|
|
str = PARAM->_string.addr + PARAM->_string.start;
|
|
|
|
len = PARAM->_string.len;
|
|
|
|
|
|
|
|
ulen = utf8_get_pos(str, len, start);
|
|
|
|
if (ulen >= len)
|
|
|
|
{
|
|
|
|
VOID_STRING(PARAM);
|
|
|
|
goto _SUBR_MID_FIN;
|
|
|
|
}
|
|
|
|
|
|
|
|
PARAM->_string.start += ulen;
|
|
|
|
//str += ulen;
|
|
|
|
//len -= ulen;
|
|
|
|
|
|
|
|
if (NPARAM == 2)
|
|
|
|
{
|
|
|
|
ulen = len - ulen;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
VALUE_conv_integer(&PARAM[2]);
|
|
|
|
length = PARAM[2]._integer.value;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
if (length < 0)
|
|
|
|
length += utf8_get_length(str, len) - start;
|
|
|
|
|
|
|
|
if (length == 1)
|
|
|
|
ulen = utf8_get_char_length(str[ulen]);
|
|
|
|
else
|
|
|
|
ulen = utf8_get_pos(str, len, start + length) - ulen;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ulen <= 0)
|
|
|
|
{
|
|
|
|
VOID_STRING(PARAM);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
PARAM->_string.len = ulen;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
_SUBR_MID_FIN:
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
SP -= NPARAM;
|
|
|
|
SP++;
|
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
static void String_Left(ushort code)
|
|
|
|
{
|
|
|
|
int val;
|
|
|
|
char *str;
|
|
|
|
int len, ulen;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
SUBR_ENTER();
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
if (!SUBR_check_string(PARAM))
|
|
|
|
{
|
|
|
|
if (NPARAM == 1)
|
|
|
|
val = 1;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
VALUE_conv_integer(&PARAM[1]);
|
|
|
|
val = PARAM[1]._integer.value;
|
|
|
|
}
|
|
|
|
|
|
|
|
str = PARAM->_string.addr + PARAM->_string.start;
|
|
|
|
len = PARAM->_string.len;
|
|
|
|
|
|
|
|
if (val < 0)
|
|
|
|
val += utf8_get_length(str, len);
|
|
|
|
|
|
|
|
ulen = utf8_get_pos(str, len, val);
|
|
|
|
PARAM->_string.len = ulen;
|
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
SP -= NPARAM;
|
|
|
|
SP++;
|
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
static void String_Right(ushort code)
|
|
|
|
{
|
|
|
|
int val;
|
|
|
|
char *str;
|
|
|
|
int len, ulen;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
SUBR_ENTER();
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
if (!SUBR_check_string(PARAM))
|
|
|
|
{
|
|
|
|
if (NPARAM == 1)
|
|
|
|
val = 1;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
VALUE_conv_integer(&PARAM[1]);
|
|
|
|
val = PARAM[1]._integer.value;
|
|
|
|
}
|
|
|
|
|
|
|
|
str = PARAM->_string.addr + PARAM->_string.start;
|
|
|
|
len = PARAM->_string.len;
|
|
|
|
|
|
|
|
if (val < 0)
|
|
|
|
val = (-val);
|
|
|
|
else
|
|
|
|
val = utf8_get_length(str, len) - val;
|
|
|
|
|
|
|
|
ulen = utf8_get_pos(str, len, val);
|
|
|
|
|
|
|
|
PARAM->_string.start += ulen;
|
|
|
|
PARAM->_string.len -= ulen;
|
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
SP -= NPARAM;
|
|
|
|
SP++;
|
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
|
2011-02-22 18:59:07 +01:00
|
|
|
static bool convert_to_unicode(wchar_t **wstr, int *wlen, const char *str, int len, bool upper)
|
|
|
|
{
|
|
|
|
char *temp;
|
|
|
|
wchar_t *wtemp;
|
|
|
|
int i, l;
|
|
|
|
|
|
|
|
if (len == 0)
|
|
|
|
{
|
|
|
|
*wstr = NULL;
|
|
|
|
*wlen = 0;
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (STRING_conv(&temp, str, len, "UTF-8", SC_UNICODE, FALSE))
|
|
|
|
return TRUE;
|
|
|
|
|
|
|
|
wtemp = (wchar_t *)temp;
|
|
|
|
l = wcslen(wtemp);
|
|
|
|
|
|
|
|
if (upper)
|
|
|
|
{
|
|
|
|
for (i = 0; i < l; i++)
|
|
|
|
wtemp[i] = towupper(wtemp[i]);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (i = 0; i < l; i++)
|
|
|
|
wtemp[i] = towlower(wtemp[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
*wstr = wtemp;
|
|
|
|
*wlen = l;
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
|
2008-01-17 22:39:26 +01:00
|
|
|
static void convert_string(char *str, int len, bool upper)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
2011-02-22 18:59:07 +01:00
|
|
|
char *temp = NULL;
|
2011-09-08 18:01:36 +02:00
|
|
|
wchar_t *wtemp;
|
2011-02-22 18:59:07 +01:00
|
|
|
int ltemp;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
if (len > 0)
|
|
|
|
{
|
2011-02-22 18:59:07 +01:00
|
|
|
if (convert_to_unicode(&wtemp, <emp, str, len, upper))
|
|
|
|
goto __ERROR;
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
if (STRING_conv(&temp, (char *)wtemp, ltemp * sizeof(wchar_t), SC_UNICODE, "UTF-8", FALSE))
|
|
|
|
goto __ERROR;
|
|
|
|
}
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_ReturnString(temp);
|
|
|
|
return;
|
|
|
|
|
2008-10-28 17:10:06 +01:00
|
|
|
__ERROR:
|
|
|
|
|
2010-05-28 08:24:14 +02:00
|
|
|
if (len > 0)
|
|
|
|
GB_ReturnNewString(str, len);
|
|
|
|
else
|
|
|
|
GB_ReturnNull();
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-08-27 21:49:48 +02:00
|
|
|
BEGIN_METHOD(String_Lower, GB_STRING str)
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
convert_string(STRING(str), LENGTH(str), FALSE);
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
END_METHOD
|
|
|
|
|
|
|
|
|
2011-08-27 21:49:48 +02:00
|
|
|
BEGIN_METHOD(String_Upper, GB_STRING str)
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
convert_string(STRING(str), LENGTH(str), TRUE);
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
END_METHOD
|
|
|
|
|
|
|
|
|
2011-08-27 21:49:48 +02:00
|
|
|
BEGIN_METHOD(String_Chr, GB_INTEGER code)
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
const char *charset = EXEC_big_endian ? "UCS-4BE" : "UCS-4LE";
|
|
|
|
char *temp;
|
|
|
|
|
|
|
|
STRING_conv(&temp, (char *)(&VARG(code)), sizeof(wchar_t), charset, "UTF-8", TRUE);
|
|
|
|
GB_ReturnString(temp);
|
|
|
|
|
|
|
|
END_METHOD
|
|
|
|
|
2011-08-27 21:49:48 +02:00
|
|
|
BEGIN_METHOD(String_Code, GB_STRING str; GB_INTEGER index)
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
char *str;
|
|
|
|
int len, index, pos, lc;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
index = VARGOPT(index, 1);
|
|
|
|
if (index < 1)
|
|
|
|
{
|
|
|
|
GB_ReturnInteger(0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
str = STRING(str);
|
|
|
|
len = LENGTH(str);
|
|
|
|
pos = utf8_get_pos(str, len, index - 1);
|
|
|
|
lc = utf8_get_char_length(str[pos]);
|
|
|
|
|
|
|
|
GB_ReturnInteger(utf8_get_unicode(&str[pos], lc));
|
|
|
|
|
2007-12-30 17:41:49 +01:00
|
|
|
END_METHOD
|
|
|
|
|
2011-02-22 18:59:07 +01:00
|
|
|
static void string_search(const char *str, int len, const char *pattern, int lenp, int start, bool right, bool nocase)
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
2008-01-17 22:39:26 +01:00
|
|
|
int pos;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
if (start)
|
|
|
|
start = index_to_byte(str, len, start);
|
2011-02-22 18:59:07 +01:00
|
|
|
|
|
|
|
if (!nocase)
|
|
|
|
{
|
|
|
|
pos = STRING_search(str, len, pattern, lenp, start, right, FALSE);
|
|
|
|
pos = byte_to_index(str, len, pos);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
wchar_t *wstr;
|
|
|
|
int lstr;
|
|
|
|
wchar_t *wpattern;
|
|
|
|
int lpattern;
|
|
|
|
|
|
|
|
if (convert_to_unicode(&wstr, &lstr, str, len, TRUE))
|
|
|
|
goto __ERROR;
|
|
|
|
|
|
|
|
if (convert_to_unicode(&wpattern, &lpattern, pattern, lenp, TRUE))
|
|
|
|
goto __ERROR;
|
|
|
|
|
|
|
|
pos = STRING_search((char *)wstr, lstr * sizeof(wchar_t), (char *)wpattern, lpattern * sizeof(wchar_t), start * sizeof(wchar_t), right, FALSE);
|
|
|
|
if (pos)
|
|
|
|
pos = (pos - 1) / sizeof(wchar_t) + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
GB_ReturnInteger(pos);
|
|
|
|
return;
|
|
|
|
|
|
|
|
__ERROR:
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-02-22 18:59:07 +01:00
|
|
|
GB_ReturnInteger(0);
|
|
|
|
return;
|
2007-12-30 17:41:49 +01:00
|
|
|
}
|
|
|
|
|
2011-08-27 21:49:48 +02:00
|
|
|
BEGIN_METHOD(String_Instr, GB_STRING str; GB_STRING pattern; GB_INTEGER start; GB_INTEGER mode)
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-02-22 18:59:07 +01:00
|
|
|
string_search(STRING(str), LENGTH(str), STRING(pattern), LENGTH(pattern), VARGOPT(start, 0), FALSE, VARGOPT(mode, GB_COMP_BINARY) == GB_COMP_NOCASE);
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
END_METHOD
|
|
|
|
|
2011-08-27 21:49:48 +02:00
|
|
|
BEGIN_METHOD(String_RInstr, GB_STRING str; GB_STRING pattern; GB_INTEGER start; GB_INTEGER mode)
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-02-22 18:59:07 +01:00
|
|
|
string_search(STRING(str), LENGTH(str), STRING(pattern), LENGTH(pattern), VARGOPT(start, 0), TRUE, VARGOPT(mode, GB_COMP_BINARY) == GB_COMP_NOCASE);
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
END_METHOD
|
|
|
|
|
2011-08-27 21:49:48 +02:00
|
|
|
BEGIN_METHOD(String_Comp, GB_STRING str1; GB_STRING str2; GB_INTEGER mode)
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2010-02-08 00:10:41 +01:00
|
|
|
int mode = VARGOPT(mode, GB_COMP_BINARY) | GB_COMP_LANG;
|
|
|
|
bool nocase = (mode & GB_COMP_NOCASE) != 0;
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2010-02-08 00:10:41 +01:00
|
|
|
if (mode & GB_COMP_NATURAL)
|
|
|
|
GB_ReturnInteger(COMPARE_string_natural(STRING(str1), LENGTH(str1), STRING(str2), LENGTH(str2), nocase));
|
|
|
|
else
|
|
|
|
GB_ReturnInteger(COMPARE_string_lang(STRING(str1), LENGTH(str1), STRING(str2), LENGTH(str2), nocase, TRUE));
|
2007-12-30 17:41:49 +01:00
|
|
|
|
|
|
|
END_METHOD
|
|
|
|
|
2011-08-27 21:49:48 +02:00
|
|
|
#define IS_VALID(_char) \
|
2011-09-08 18:01:36 +02:00
|
|
|
((_char) < 0x110000 && \
|
|
|
|
(((_char) & 0xFFFFF800) != 0xD800) && \
|
|
|
|
((_char) < 0xFDD0 || (_char) > 0xFDEF) && \
|
|
|
|
((_char) & 0xFFFE) != 0xFFFE)
|
2011-08-27 21:49:48 +02:00
|
|
|
|
|
|
|
BEGIN_METHOD(String_IsValid, GB_STRING str)
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
const uchar *str;
|
|
|
|
int len, lc;
|
2011-08-27 21:49:48 +02:00
|
|
|
uint unicode;
|
|
|
|
bool valid = FALSE;
|
|
|
|
int i;
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
str = (const uchar *)STRING(str);
|
|
|
|
len = LENGTH(str);
|
|
|
|
|
|
|
|
while (len)
|
2011-08-27 21:49:48 +02:00
|
|
|
{
|
2011-09-08 18:01:36 +02:00
|
|
|
lc = utf8_get_char_length(*str);
|
|
|
|
len -= lc;
|
|
|
|
if (len < 0)
|
2011-08-27 21:49:48 +02:00
|
|
|
goto _INVALID;
|
|
|
|
|
|
|
|
//for (i = 0; i < len; i++)
|
|
|
|
// fprintf(stderr, "%02X ", str[i]);
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
for (i = 1; i < lc; i++)
|
2011-08-27 21:49:48 +02:00
|
|
|
{
|
|
|
|
if ((str[i] & 0xC0) != 0x80)
|
|
|
|
goto _INVALID;
|
|
|
|
}
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
unicode = utf8_get_unicode((char *)str, lc);
|
|
|
|
if (unicode == UNICODE_INVALID)
|
|
|
|
goto _INVALID;
|
2011-08-27 21:49:48 +02:00
|
|
|
if (!IS_VALID(unicode))
|
|
|
|
goto _INVALID;
|
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
str += lc;
|
2011-08-27 21:49:48 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
valid = TRUE;
|
|
|
|
|
|
|
|
_INVALID:
|
|
|
|
|
|
|
|
GB_ReturnBoolean(valid);
|
|
|
|
//fprintf(stderr, "\n");
|
|
|
|
|
|
|
|
END_METHOD
|
|
|
|
|
2007-12-30 17:41:49 +01:00
|
|
|
#endif
|
|
|
|
|
2008-01-17 22:39:26 +01:00
|
|
|
GB_DESC NATIVE_String[] =
|
2007-12-30 17:41:49 +01:00
|
|
|
{
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_DECLARE("String", 0), GB_VIRTUAL_CLASS(),
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_STATIC_METHOD("Len", "i", String_Len, "(String)s"),
|
2011-08-27 21:49:48 +02:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_STATIC_FAST_METHOD("Mid", "s", String_Mid, "(String)s(Start)i[(Length)i]"),
|
|
|
|
GB_STATIC_FAST_METHOD("Mid$", "s", String_Mid, "(String)s(Start)i[(Length)i]"),
|
|
|
|
GB_STATIC_FAST_METHOD("Left", "s", String_Left, "(String)s[(Length)i]"),
|
|
|
|
GB_STATIC_FAST_METHOD("Left$", "s", String_Left, "(String)s[(Length)i]"),
|
|
|
|
GB_STATIC_FAST_METHOD("Right", "s", String_Right, "(String)s[(Length)i]"),
|
|
|
|
GB_STATIC_FAST_METHOD("Right$", "s", String_Right, "(String)s[(Length)i]"),
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_STATIC_METHOD("Upper", "s", String_Upper, "(String)s"),
|
|
|
|
GB_STATIC_METHOD("Upper$", "s", String_Upper, "(String)s"),
|
|
|
|
GB_STATIC_METHOD("UCase", "s", String_Upper, "(String)s"),
|
|
|
|
GB_STATIC_METHOD("UCase$", "s", String_Upper, "(String)s"),
|
|
|
|
GB_STATIC_METHOD("Lower", "s", String_Lower, "(String)s"),
|
|
|
|
GB_STATIC_METHOD("Lower$", "s", String_Lower, "(String)s"),
|
|
|
|
GB_STATIC_METHOD("LCase", "s", String_Lower, "(String)s"),
|
|
|
|
GB_STATIC_METHOD("LCase$", "s", String_Lower, "(String)s"),
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_STATIC_METHOD("InStr", "i", String_Instr, "(String)s(Pattern)s[(From)i(Mode)i]"),
|
|
|
|
GB_STATIC_METHOD("RInStr", "i", String_RInstr, "(String)s(Pattern)s[(From)i(Mode)i]"),
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_STATIC_METHOD("Comp", "i", String_Comp, "(String)s(String2)s[(Mode)i]"),
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_STATIC_METHOD("Byte", "i", String_Pos, "(String)s(Index)i"),
|
|
|
|
GB_STATIC_METHOD("Pos", "i", String_Pos, "(String)s(Index)i"),
|
|
|
|
GB_STATIC_METHOD("Index", "i", String_Index, "(String)s(Byte)i"),
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_STATIC_METHOD("Chr", "s", String_Chr, "(Unicode)i"),
|
|
|
|
GB_STATIC_METHOD("Chr$", "s", String_Chr, "(Unicode)i"),
|
|
|
|
GB_STATIC_METHOD("Code", "i", String_Code, "(String)s[(Index)i]"),
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_STATIC_METHOD("IsValid", "b", String_IsValid, "(String)s"),
|
2007-12-30 17:41:49 +01:00
|
|
|
|
2011-09-08 18:01:36 +02:00
|
|
|
GB_END_DECLARE
|
2007-12-30 17:41:49 +01:00
|
|
|
};
|