[GB.DATA]

* BUG: The 256 values of an input byte were mapped to just 128 bits of the
  trie children mask, so corruption could have happened with non-ASCII keys.
* BUG: Add a special case when using GB.AddString() on an empty key
* BUG: Fix a memory error from Trie.Complete()'s return value
* OPT: Use the Gambas API in trie.c to manage memory and thus remove error
  recovery since the interpreter will crash anyway
* OPT: Remove checks for "unsupported" characters as all 256 values of a
  byte are supported in this version of the code



git-svn-id: svn://localhost/gambas/trunk@6515 867c0c6c-44f3-4631-809d-bfa615b0a4ec
This commit is contained in:
Tobias Boege 2014-09-28 20:01:52 +00:00
parent 35834a61ce
commit 2d81f8dff0
3 changed files with 79 additions and 121 deletions

View file

@ -96,23 +96,15 @@ BEGIN_METHOD(Trie_put, GB_VARIANT value; GB_STRING key)
GB_VARIANT_VALUE *val;
if (VARG(value).type == GB_T_NULL) {
if (trie_remove(THIS->root, STRING(key), LENGTH(key),
value_dtor)) {
GB.Error(ERR_OOM);
return;
}
trie_remove(THIS->root, STRING(key), LENGTH(key),
value_dtor);
UPDATE_TIME();
return;
}
GB.Alloc((void **) &val, sizeof(*val));
val->type = GB_T_NULL;
GB.StoreVariant(ARG(value), val);
if (trie_insert(THIS->root, STRING(key), LENGTH(key), val)) {
GB.StoreVariant(NULL, val);
GB.Free((void **) &val);
GB.Error(ERR_OOM);
return;
}
trie_insert(THIS->root, STRING(key), LENGTH(key), val);
UPDATE_TIME();
END_METHOD
@ -175,8 +167,13 @@ next:
node = top->node->children[top->idx];
visit:
if (!top->visited) {
THIS->key = GB.AddString(THIS->key, top->node->key,
top->node->len);
/* AddString() will take the root node's len == 0 as a
* request to use strlen(). Make that a special case. */
if (top->node->len) {
THIS->key = GB.AddString(THIS->key,
top->node->key,
top->node->len);
}
} else {
struct stack *old = top;
@ -270,11 +267,14 @@ BEGIN_METHOD(Trie_Complete, GB_STRING prefix)
return;
}
s = GB.TempString(STRING(prefix), LENGTH(prefix));
s = GB.NewString(STRING(prefix), LENGTH(prefix));
/* Again, we need to special-case p.node->len - p.i == 0. */
if (p.node->len - p.i)
s = GB.AddString(s, p.node->key + p.i, p.node->len - p.i);
GB.ReturnString(s);
GB.ReturnBorrow();
GB.FreeString(&s);
GB.ReturnRelease();
END_METHOD

View file

@ -24,11 +24,11 @@
#include "trie.h"
#include "c_trie.h"
/**
* __key_index() - Return a unique number for the character
* @c: char
*
* If `c' is not a trie key character, this function returns -1.
*/
static inline int __key_index(char c)
{
@ -54,15 +54,14 @@ static inline int popcnt(uint64_t word)
* corresponding to a key character
* @node: struct trie
* @c: the character
*
* This returns -1 on invalid character.
*/
static inline int __key_to_array_index(const struct trie *node, char c)
{
int i = __key_index(c), n;
int i = __key_index(c), j, n;
n = i >= MASK_SIZE ? popcnt(node->mask[0]) : 0;
n += popcnt(node->mask[INDEX(i)] & ((1UL << OFFSET(i)) - 1));
for (j = n = 0; i >= MASK_SIZE; j++, i -= MASK_SIZE)
n += popcnt(node->mask[j]);
n += popcnt(node->mask[j] & ((1UL << i) - 1));
return n;
}
@ -112,7 +111,7 @@ static inline struct trie *get_continuation(const struct trie *node, char c)
int i = __key_index(c);
int j = __key_to_array_index(node, c);
if (i == -1 || !test_bit(node, i))
if (!test_bit(node, i))
return NULL;
return node->children[j];
}
@ -213,9 +212,7 @@ static struct trie *new_node(const char *key, size_t len, void *value)
/*if (!len)
len = strlen(key);*/
trie = malloc(sizeof(*trie) + len);
if (!trie)
return NULL;
GB.Alloc((void **) &trie, sizeof(*trie) + len);
memset(trie->mask, 0, sizeof(trie->mask));
trie->children = NULL;
trie->nchildren = 0;
@ -240,10 +237,10 @@ struct trie *new_trie(void)
*/
static void destroy_node(struct trie *node, void (*dtor)(void *))
{
free(node->children);
GB.Free((void **) &node->children);
if (node->value && dtor)
dtor(node->value);
free(node);
GB.Free((void **) &node);
}
/**
@ -272,7 +269,7 @@ void clear_trie(struct trie *trie, void (*dtor)(void *))
for (i = 0; i < trie->nchildren; i++)
destroy_trie(trie->children[i], dtor);
memset(trie->mask, 0, sizeof(trie->mask));
free(trie->children);
GB.Free((void **) &trie->children);
trie->children = NULL;
trie->nchildren = 0;
if (trie->value)
@ -288,13 +285,12 @@ void clear_trie(struct trie *trie, void (*dtor)(void *))
* @child2: struct trie, may be NULL
*
* This function writes the apropriate mask for the array into the `mask'
* argument. If one of the keys was found to contain illegal characters,
* nothing is written and -1 is returned.
* argument.
*
* The `child2' can be NULL in which case it is ignored and not assigned to
* the array.
*/
static inline int __sort_two_children(const struct trie *array[2],
static inline void __sort_two_children(const struct trie *array[2],
uint64_t mask[4],
const struct trie *child1,
const struct trie *child2)
@ -303,8 +299,6 @@ static inline int __sort_two_children(const struct trie *array[2],
i = __key_index(*child1->key);
j = child2 ? __key_index(*child2->key) : 0; /* just to initialise */
if (i == -1 || j == -1)
return -1;
if (!child2 || i < j) {
array[0] = child1;
if (child2)
@ -316,7 +310,6 @@ static inline int __sort_two_children(const struct trie *array[2],
__set_bit(mask, i);
if (child2)
__set_bit(mask, j);
return 0;
}
/**
@ -326,8 +319,8 @@ static inline int __sort_two_children(const struct trie *array[2],
* @len: length
* @value: the value
*/
static int __trie_insert_split(struct __trie_find_res *res, const char *key,
size_t len, void *value)
static void __trie_insert_split(struct __trie_find_res *res, const char *key,
size_t len, void *value)
{
struct trie *node = res->node, *bottom, *branch = NULL;
struct trie **topchildren;
@ -346,24 +339,16 @@ static int __trie_insert_split(struct __trie_find_res *res, const char *key,
*/
bottom = new_node(&node->key[res->i], node->len - res->i,
node->value);
if (!bottom)
return -1;
if (have_branch) {
branch = new_node(&key[res->j], len - res->j, value);
if (!branch)
goto error_free;
topchildren = malloc(2 * sizeof(*topchildren));
GB.Alloc((void **) &topchildren, 2 * sizeof(*topchildren));
} else {
topchildren = malloc(sizeof(*topchildren));
GB.Alloc((void **) &topchildren, sizeof(*topchildren));
}
if (!topchildren)
goto error_free2;
/* While doing the malloc() stuff, we can already realloc() the
/* While doing the Alloc() stuff, we can already Realloc() the
* "top" node here... */
node = realloc(node, sizeof(*node) + res->j);
if (!node)
goto error_free3;
GB.Realloc((void **) &node, sizeof(*node) + res->j);
/* Link the split node into the trie again */
int i = __key_to_array_index(res->parent, *node->key);
@ -376,7 +361,7 @@ static int __trie_insert_split(struct __trie_find_res *res, const char *key,
* After we have copied them from the "top" node, we can set the
* members there correctly to: ->mask, ->children, ->nchildren,
* ->value and ->len need tweaking while ->key was cut properly by
* realloc().
* Realloc().
*/
memcpy(bottom->mask, node->mask, sizeof(bottom->mask));
bottom->children = node->children;
@ -386,9 +371,8 @@ static int __trie_insert_split(struct __trie_find_res *res, const char *key,
* __sort_two_children() is aware that `branch' may be NULL.
*/
memset(node->mask, 0, sizeof(node->mask));
if (__sort_two_children((const struct trie **) topchildren,
node->mask, bottom, branch) == -1)
goto error_free3;
__sort_two_children((const struct trie **) topchildren,
node->mask, bottom, branch);
node->children = topchildren;
node->nchildren = have_branch ? 2 : 1;
node->value = NULL;
@ -401,16 +385,6 @@ static int __trie_insert_split(struct __trie_find_res *res, const char *key,
*/
if (!have_branch)
node->value = value;
return 0;
error_free3:
free(topchildren);
error_free2:
destroy_node(branch, NULL);
error_free:
destroy_node(bottom, NULL);
return -1;
}
/**
@ -420,8 +394,8 @@ error_free:
* @len: length
* @value: the value
*/
static int __trie_insert_child(struct __trie_find_res *res, const char *key,
size_t len, void *value)
static void __trie_insert_child(struct __trie_find_res *res, const char *key,
size_t len, void *value)
{
struct trie *node = res->parent, *child, **children;
int i, j, k;
@ -436,28 +410,18 @@ static int __trie_insert_child(struct __trie_find_res *res, const char *key,
*/
child = new_node(&key[res->j], len - res->j, value);
if (!child)
return -1;
i = __key_index(*child->key);
j = __key_to_array_index(node, *child->key);
if (i == -1 || j == -1)
goto error_free;
children = realloc(node->children, (node->nchildren + 1) *
sizeof(*children));
if (!children)
goto error_free;
children = node->children;
GB.Realloc((void **) &children, (node->nchildren + 1) *
sizeof(*children));
for (k = node->nchildren; k > j; k--)
children[k] = children[k - 1];
children[k] = child;
node->children = children;
node->nchildren++;
set_bit(node, i);
return 0;
error_free:
destroy_node(child, NULL);
return -1;
}
/**
@ -471,25 +435,28 @@ error_free:
* node. Note that the NULL pointer is an invalid `value' and is used to
* detect value-less nodes, so don't use it!
*/
int trie_insert(struct trie *trie, const char *key, size_t len, void *value)
void trie_insert(struct trie *trie, const char *key, size_t len, void *value)
{
struct __trie_find_res res = __trie_find(trie, key, len);
if (res.node) {
if (__is_exact(&res, len)) {
res.node->value = value;
return 0;
return;
}
return __trie_insert_split(&res, key, len, value);
__trie_insert_split(&res, key, len, value);
} else {
return __trie_insert_child(&res, key, len, value);
}
return __trie_insert_child(&res, key, len, value);
}
/**
* __trie_remove_leaf() - Remove a leaf node
* @res: struct __trie_find_res
* @res: struct __trie_find_res
* @dtor: value destructor
*/
static int __trie_remove_leaf(struct __trie_find_res *res)
static void __trie_remove_leaf(struct __trie_find_res *res,
void (*dtor)(void *))
{
struct trie *node = res->node, *parent = res->parent;
int i, j, k;
@ -521,7 +488,7 @@ static int __trie_remove_leaf(struct __trie_find_res *res)
* a value).
*/
if (parent->nchildren == 1) { /* i) */
free(parent->children);
GB.Free((void **) &parent->children);
parent->children = NULL;
parent->nchildren = 0;
/* a) */
@ -536,54 +503,46 @@ static int __trie_remove_leaf(struct __trie_find_res *res)
other = parent->children[1];
else
other = parent->children[0];
parent = realloc(parent, sizeof(*parent) + parent->len
+ other->len);
if (!parent)
return -1;
GB.Realloc((void **) &parent, sizeof(*parent) + parent->len
+ other->len);
memcpy(parent->key + parent->len, other->key, other->len);
parent->len += other->len;
/* does a) */
memcpy(parent->mask, other->mask, sizeof(parent->mask));
free(parent->children);
GB.Free((void **) &parent->children);
parent->children = other->children;
parent->nchildren = other->nchildren;
parent->value = other->value;
/* Do NOT destroy_node() as we copied its ->children! */
free(other);
GB.Free((void **) &other);
} else { /* iii) */
struct trie **tmp;
/* does a) */
for (k = j + 1; k < parent->nchildren; k++)
parent->children[k - 1] = parent->children[k];
parent->nchildren--;
tmp = parent->children;
parent->children = realloc(tmp, parent->nchildren *
sizeof(*parent->children));
if (!parent->children) {
parent->children = tmp;
return -1;
}
GB.Realloc((void **) &parent->children, parent->nchildren *
sizeof(*parent->children));
clear_bit(parent, i);
}
destroy_node(node, NULL); /* The value is destroyed in the caller */
return 0;
destroy_node(node, dtor);
}
/**
* __trie_remove_interior() - Remove an interior node
* @res: struct __trie_find_res
* @res: struct __trie_find_res
* @dtor: value destructor
*/
static int __trie_remove_interior(struct __trie_find_res *res)
static void __trie_remove_interior(struct __trie_find_res *res,
void (*dtor)(void *))
{
/*
* Let's see: an interior node can only have 2 or more children, so
* we cannot possibly do any compression of the nodes. We just erase
* the value and leave the trie structure as-is.
*/
dtor(res->node->value);
res->node->value = NULL;
return 0;
}
/**
@ -592,12 +551,9 @@ static int __trie_remove_interior(struct __trie_find_res *res)
* @key: the key
* @len: length
* @dtor: value destructor
*
* This function may fail due to memory allocation issues or if the key is
* empty (for thou shalt not dare remove the root node).
*/
int trie_remove(struct trie *trie, const char *key, size_t len,
void (*dtor)(void *))
void trie_remove(struct trie *trie, const char *key, size_t len,
void (*dtor)(void *))
{
struct __trie_find_res res;
struct trie *node;
@ -609,15 +565,17 @@ int trie_remove(struct trie *trie, const char *key, size_t len,
* Delete a value from the root anyways.
*/
if (!node || !__is_exact(&res, len) || !node->value)
return 0;
dtor(node->value);
node->value = NULL;
if (node == trie)
return 0;
return;
if (node == trie) {
dtor(node->value);
node->value = NULL;
return;
}
if (!node->children)
return __trie_remove_leaf(&res);
return __trie_remove_interior(&res);
__trie_remove_leaf(&res, dtor);
else
__trie_remove_interior(&res, dtor);
}
/**

View file

@ -50,10 +50,10 @@ extern struct trie *new_trie(void);
extern void destroy_trie(struct trie *trie, void (*dtor)(void *));
extern void clear_trie(struct trie *trie, void (*dtor)(void *));
extern int trie_insert(struct trie *trie, const char *key, size_t len,
void *value);
extern int trie_remove(struct trie *trie, const char *key, size_t len,
void (*dtor)(void *));
extern void trie_insert(struct trie *trie, const char *key, size_t len,
void *value);
extern void trie_remove(struct trie *trie, const char *key, size_t len,
void (*dtor)(void *));
extern struct trie *trie_find(const struct trie *trie, const char *key,
size_t len);