2014-09-23 21:15:25 +02:00
|
|
|
/*
|
|
|
|
* trie.c
|
|
|
|
*
|
|
|
|
* Copyright (C) 2014 Tobias Boege <tobias@gambas-buch.de>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
|
|
* any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
* MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "trie.h"
|
|
|
|
|
2014-09-28 22:01:52 +02:00
|
|
|
#include "c_trie.h"
|
|
|
|
|
2014-09-23 21:15:25 +02:00
|
|
|
/**
|
|
|
|
* __key_index() - Return a unique number for the character
|
|
|
|
* @c: char
|
|
|
|
*/
|
|
|
|
static inline int __key_index(char c)
|
|
|
|
{
|
|
|
|
return (int) (unsigned char) c;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int popcnt(uint64_t word)
|
|
|
|
{
|
|
|
|
int n;
|
|
|
|
|
|
|
|
for (n = 0; word; n++)
|
|
|
|
word &= word - 1;
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define MASK_SIZE \
|
|
|
|
(__CHAR_BIT__ * sizeof(((struct trie *) 0)->mask[0]))
|
|
|
|
#define INDEX(i) (i / MASK_SIZE)
|
|
|
|
#define OFFSET(i) (i % MASK_SIZE)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __key_to_array_index() - Return array index over a node's ->children
|
|
|
|
* corresponding to a key character
|
|
|
|
* @node: struct trie
|
|
|
|
* @c: the character
|
|
|
|
*/
|
|
|
|
static inline int __key_to_array_index(const struct trie *node, char c)
|
|
|
|
{
|
2014-09-28 22:01:52 +02:00
|
|
|
int i = __key_index(c), j, n;
|
2014-09-23 21:15:25 +02:00
|
|
|
|
2014-09-28 22:01:52 +02:00
|
|
|
for (j = n = 0; i >= MASK_SIZE; j++, i -= MASK_SIZE)
|
|
|
|
n += popcnt(node->mask[j]);
|
2014-11-29 19:55:41 +01:00
|
|
|
n += popcnt(node->mask[j] & ((1ULL << i) - 1));
|
2014-09-23 21:15:25 +02:00
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void __set_bit(uint64_t mask[4], int i)
|
|
|
|
{
|
2014-11-29 19:55:41 +01:00
|
|
|
mask[INDEX(i)] |= 1ULL << (OFFSET(i));
|
2014-09-23 21:15:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void set_bit(struct trie *node, int i)
|
|
|
|
{
|
|
|
|
__set_bit(node->mask, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void clear_bit(struct trie *node, int i)
|
|
|
|
{
|
2014-11-29 19:55:41 +01:00
|
|
|
node->mask[INDEX(i)] &= ~(1ULL << (OFFSET(i)));
|
2014-09-23 21:15:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int test_bit(const struct trie *node, int i)
|
|
|
|
{
|
2014-11-29 19:55:41 +01:00
|
|
|
return !!(node->mask[INDEX(i)] & (1ULL << (OFFSET(i))));
|
2014-09-23 21:15:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* get_continuation() - Return the node continuing the key of another node
|
|
|
|
* with a given character
|
|
|
|
* @node: struct trie
|
|
|
|
* @c: the character
|
|
|
|
*
|
|
|
|
* If you have a trie like
|
|
|
|
*
|
|
|
|
* 0
|
|
|
|
* |
|
|
|
|
* te
|
|
|
|
* |
|
|
|
|
* +--+--+
|
|
|
|
* | |
|
|
|
|
* st rm
|
|
|
|
*
|
|
|
|
* and search for the key "term", this function comes in handy at the node
|
|
|
|
* "te". You will call get_continuation(te_node, 'r') which yields rm_node.
|
|
|
|
*
|
|
|
|
* If no such continuation exists, NULL is returned.
|
|
|
|
*/
|
|
|
|
static inline struct trie *get_continuation(const struct trie *node, char c)
|
|
|
|
{
|
|
|
|
int i = __key_index(c);
|
|
|
|
int j = __key_to_array_index(node, c);
|
|
|
|
|
2014-09-28 22:01:52 +02:00
|
|
|
if (!test_bit(node, i))
|
2014-09-23 21:15:25 +02:00
|
|
|
return NULL;
|
|
|
|
return node->children[j];
|
|
|
|
}
|
|
|
|
|
|
|
|
struct __trie_find_res {
|
|
|
|
struct trie *node, *parent;
|
|
|
|
int i, j;
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __trie_find() - Get the node containing a key
|
|
|
|
* @trie: struct trie
|
|
|
|
* @key: the key
|
|
|
|
* @len: length
|
|
|
|
*
|
|
|
|
* This function returns the node in which `key' ends which may NOT be the
|
|
|
|
* node which has exactly the key `key'. It returns NULL, if no such node
|
|
|
|
* was found.
|
|
|
|
*/
|
|
|
|
static struct __trie_find_res __trie_find(const struct trie *trie,
|
|
|
|
const char *key, size_t len)
|
|
|
|
{
|
|
|
|
struct trie *node = (struct trie *) trie, *parent = NULL;
|
|
|
|
int i = 0, j = 0;
|
|
|
|
|
|
|
|
while (node) {
|
|
|
|
i = 0;
|
|
|
|
while (i < node->len && j < len && node->key[i] == key[j]) {
|
|
|
|
i++;
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Four cases:
|
|
|
|
* 1) the `key' and `node' were entirely consumed: perfect
|
|
|
|
* match. Get out.
|
|
|
|
* 2) only `key' consumed: we're done as the key lies
|
|
|
|
* within the node.
|
|
|
|
* 3) only `node' was consumed: recurse to its children.
|
|
|
|
* 4) if neither of the above, node and key deverged here,
|
|
|
|
* so break the loop since this is as close as we can
|
|
|
|
* get.
|
|
|
|
*/
|
|
|
|
if (j == len) {
|
|
|
|
break;
|
|
|
|
} else if (i == node->len) {
|
|
|
|
parent = node;
|
|
|
|
node = get_continuation(node, key[j]);
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (struct __trie_find_res) {node, parent, i, j};
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __is_exact() - Return whether a found node matches a key exactly
|
|
|
|
* @res: struct __trie_find_res
|
|
|
|
* @len: length
|
|
|
|
*/
|
|
|
|
static inline int __is_exact(struct __trie_find_res *res, size_t len)
|
|
|
|
{
|
|
|
|
return res->i == res->node->len && res->j == len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __trie_find_exact() - Get the node with ends in a key
|
|
|
|
* @trie: struct trie
|
|
|
|
* @key: the key
|
|
|
|
* @len: length
|
|
|
|
*
|
|
|
|
* Unlike __trie_find(), this function returns the node which has the key
|
|
|
|
* `key' or NULL if none. Note, however, that this is not a guarantee that
|
|
|
|
* the node contains a non-NULL ->value.
|
|
|
|
*/
|
|
|
|
static struct trie *__trie_find_exact(const struct trie *trie,
|
|
|
|
const char *key, size_t len)
|
|
|
|
{
|
|
|
|
struct __trie_find_res res = __trie_find(trie, key, len);
|
|
|
|
|
|
|
|
return (res.node && __is_exact(&res, len)) ? res.node : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __new_node() - Allocate a trie node
|
|
|
|
* @key: part of the key
|
|
|
|
* @len: length of `key'
|
|
|
|
* @value: payload
|
|
|
|
*
|
|
|
|
* If `len' equals zero, the length is obtained from `key' via strlen(). For
|
|
|
|
* the single case where a zero length is correct, this doesn't do much
|
|
|
|
* harm - as opposed to the strangeness of comparing a size_t to -1 or some
|
|
|
|
* other clearly invalid value.
|
|
|
|
*/
|
|
|
|
static struct trie *new_node(const char *key, size_t len, void *value)
|
|
|
|
{
|
|
|
|
struct trie *trie;
|
|
|
|
|
|
|
|
/*if (!len)
|
|
|
|
len = strlen(key);*/
|
|
|
|
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Alloc((void **) &trie, sizeof(*trie) + len);
|
2014-09-23 21:15:25 +02:00
|
|
|
memset(trie->mask, 0, sizeof(trie->mask));
|
|
|
|
trie->children = NULL;
|
|
|
|
trie->nchildren = 0;
|
|
|
|
trie->value = value;
|
|
|
|
trie->len = len;
|
|
|
|
memcpy(trie->key, key, len);
|
|
|
|
return trie;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* new_trie() - Allocate a new trie
|
|
|
|
*/
|
|
|
|
struct trie *new_trie(void)
|
|
|
|
{
|
|
|
|
return new_node("", 0, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* destroy_node() - Deallocate a single node
|
|
|
|
* @node: struct trie
|
|
|
|
* @dtor: value destructor
|
|
|
|
*/
|
|
|
|
static void destroy_node(struct trie *node, void (*dtor)(void *))
|
|
|
|
{
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Free((void **) &node->children);
|
2014-09-23 21:15:25 +02:00
|
|
|
if (node->value && dtor)
|
|
|
|
dtor(node->value);
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Free((void **) &node);
|
2014-09-23 21:15:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* destroy_trie() - Deallocate an entire trie
|
|
|
|
* @trie: struct trie
|
|
|
|
* @dtor: value destructor
|
|
|
|
*/
|
|
|
|
void destroy_trie(struct trie *trie, void (*dtor)(void *))
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < trie->nchildren; i++)
|
|
|
|
destroy_trie(trie->children[i], dtor);
|
|
|
|
destroy_node(trie, dtor);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* clear_trie() - Remove all but the root
|
|
|
|
* @trie: struct trie
|
|
|
|
* @dtor: value destructor
|
|
|
|
*/
|
|
|
|
void clear_trie(struct trie *trie, void (*dtor)(void *))
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < trie->nchildren; i++)
|
|
|
|
destroy_trie(trie->children[i], dtor);
|
|
|
|
memset(trie->mask, 0, sizeof(trie->mask));
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Free((void **) &trie->children);
|
2014-09-23 21:15:25 +02:00
|
|
|
trie->children = NULL;
|
|
|
|
trie->nchildren = 0;
|
|
|
|
if (trie->value)
|
|
|
|
dtor(trie->value);
|
|
|
|
trie->value = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __sort_two_children() - Sort (at most) two children into a children array
|
|
|
|
* @array: array with enough space for the element(s)
|
|
|
|
* @mask: buffer
|
|
|
|
* @child1: struct trie
|
|
|
|
* @child2: struct trie, may be NULL
|
|
|
|
*
|
|
|
|
* This function writes the apropriate mask for the array into the `mask'
|
2014-09-28 22:01:52 +02:00
|
|
|
* argument.
|
2014-09-23 21:15:25 +02:00
|
|
|
*
|
|
|
|
* The `child2' can be NULL in which case it is ignored and not assigned to
|
|
|
|
* the array.
|
|
|
|
*/
|
2014-09-28 22:01:52 +02:00
|
|
|
static inline void __sort_two_children(const struct trie *array[2],
|
2014-09-23 21:15:25 +02:00
|
|
|
uint64_t mask[4],
|
|
|
|
const struct trie *child1,
|
|
|
|
const struct trie *child2)
|
|
|
|
{
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
i = __key_index(*child1->key);
|
|
|
|
j = child2 ? __key_index(*child2->key) : 0; /* just to initialise */
|
|
|
|
if (!child2 || i < j) {
|
|
|
|
array[0] = child1;
|
|
|
|
if (child2)
|
|
|
|
array[1] = child2;
|
|
|
|
} else {
|
|
|
|
array[0] = child2;
|
|
|
|
array[1] = child1;
|
|
|
|
}
|
|
|
|
__set_bit(mask, i);
|
|
|
|
if (child2)
|
|
|
|
__set_bit(mask, j);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __trie_insert_split() - Split a node to insert a new key
|
|
|
|
* @res: struct __trie_find_res
|
|
|
|
* @key: the key
|
|
|
|
* @len: length
|
|
|
|
* @value: the value
|
|
|
|
*/
|
2014-09-28 22:01:52 +02:00
|
|
|
static void __trie_insert_split(struct __trie_find_res *res, const char *key,
|
|
|
|
size_t len, void *value)
|
2014-09-23 21:15:25 +02:00
|
|
|
{
|
|
|
|
struct trie *node = res->node, *bottom, *branch = NULL;
|
|
|
|
struct trie **topchildren;
|
|
|
|
/*
|
|
|
|
* If key[res->j] == '\0', the key lies within `node' and will be in
|
|
|
|
* the "top" node already, so we save the `branch'.
|
|
|
|
*/
|
|
|
|
int have_branch = !!key[res->j];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* - `bottom' will contain the bottom part of the split node;
|
|
|
|
* - `branch' will be the new node associated with the wanted key
|
|
|
|
* (if it is not within the `node')
|
|
|
|
* - `topchildren' is the new ->children array of the "top" half of
|
|
|
|
* the split node - which will consist of `bottom' and `branch'.
|
|
|
|
*/
|
|
|
|
bottom = new_node(&node->key[res->i], node->len - res->i,
|
|
|
|
node->value);
|
|
|
|
|
|
|
|
if (have_branch) {
|
|
|
|
branch = new_node(&key[res->j], len - res->j, value);
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Alloc((void **) &topchildren, 2 * sizeof(*topchildren));
|
2014-09-23 21:15:25 +02:00
|
|
|
} else {
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Alloc((void **) &topchildren, sizeof(*topchildren));
|
2014-09-23 21:15:25 +02:00
|
|
|
}
|
2014-09-28 22:01:52 +02:00
|
|
|
/* While doing the Alloc() stuff, we can already Realloc() the
|
2014-09-23 21:15:25 +02:00
|
|
|
* "top" node here... */
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Realloc((void **) &node, sizeof(*node) + res->j);
|
2014-09-23 21:15:25 +02:00
|
|
|
/* Link the split node into the trie again */
|
|
|
|
int i = __key_to_array_index(res->parent, *node->key);
|
|
|
|
|
|
|
|
res->parent->children[i] = node;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* new_node() set `bottom' already up quite well. However, we need
|
|
|
|
* to tweak: ->mask, ->children and ->nchildren.
|
|
|
|
*
|
|
|
|
* After we have copied them from the "top" node, we can set the
|
|
|
|
* members there correctly to: ->mask, ->children, ->nchildren,
|
|
|
|
* ->value and ->len need tweaking while ->key was cut properly by
|
2014-09-28 22:01:52 +02:00
|
|
|
* Realloc().
|
2014-09-23 21:15:25 +02:00
|
|
|
*/
|
|
|
|
memcpy(bottom->mask, node->mask, sizeof(bottom->mask));
|
|
|
|
bottom->children = node->children;
|
|
|
|
bottom->nchildren = node->nchildren;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* __sort_two_children() is aware that `branch' may be NULL.
|
|
|
|
*/
|
|
|
|
memset(node->mask, 0, sizeof(node->mask));
|
2014-09-28 22:01:52 +02:00
|
|
|
__sort_two_children((const struct trie **) topchildren,
|
|
|
|
node->mask, bottom, branch);
|
2014-09-23 21:15:25 +02:00
|
|
|
node->children = topchildren;
|
|
|
|
node->nchildren = have_branch ? 2 : 1;
|
|
|
|
node->value = NULL;
|
|
|
|
node->len = res->i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The new `branch' has everything right if it exists as it has no
|
|
|
|
* children which need extra care. If it wasn't created, we need to
|
|
|
|
* assign the `value' to the "top" node now.
|
|
|
|
*/
|
|
|
|
if (!have_branch)
|
|
|
|
node->value = value;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __trie_insert_child() - Extend an already existing key to a new one
|
|
|
|
* @res: struct __trie_find_res
|
|
|
|
* @key: the key
|
|
|
|
* @len: length
|
|
|
|
* @value: the value
|
|
|
|
*/
|
2014-09-28 22:01:52 +02:00
|
|
|
static void __trie_insert_child(struct __trie_find_res *res, const char *key,
|
|
|
|
size_t len, void *value)
|
2014-09-23 21:15:25 +02:00
|
|
|
{
|
|
|
|
struct trie *node = res->parent, *child, **children;
|
|
|
|
int i, j, k;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Here, we CAN'T have the case that `node' has no children and no
|
|
|
|
* value so that we could concatenate the keys. This just cannot
|
|
|
|
* happen while adding nodes: then we would have added a leaf node
|
|
|
|
* without a value which doesn't happen. The case above can only
|
|
|
|
* occur when children are *removed* from an interior parent without
|
|
|
|
* a value and is thus handled in the trie_remove() function.
|
|
|
|
*/
|
|
|
|
|
|
|
|
child = new_node(&key[res->j], len - res->j, value);
|
|
|
|
i = __key_index(*child->key);
|
|
|
|
j = __key_to_array_index(node, *child->key);
|
2014-09-28 22:01:52 +02:00
|
|
|
children = node->children;
|
|
|
|
GB.Realloc((void **) &children, (node->nchildren + 1) *
|
|
|
|
sizeof(*children));
|
|
|
|
|
2014-09-23 21:15:25 +02:00
|
|
|
for (k = node->nchildren; k > j; k--)
|
|
|
|
children[k] = children[k - 1];
|
|
|
|
children[k] = child;
|
|
|
|
node->children = children;
|
|
|
|
node->nchildren++;
|
|
|
|
set_bit(node, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* trie_insert() - Associate a value with a key in the trie
|
|
|
|
* @trie: struct trie
|
|
|
|
* @key: the key
|
|
|
|
* @len: length
|
|
|
|
* @value: the value
|
|
|
|
*
|
|
|
|
* You can use the empty string as `key' to save data in the trie's root
|
|
|
|
* node. Note that the NULL pointer is an invalid `value' and is used to
|
|
|
|
* detect value-less nodes, so don't use it!
|
2014-12-01 23:39:28 +01:00
|
|
|
*
|
|
|
|
* If a value was replaced, the old value is returned.
|
2014-09-23 21:15:25 +02:00
|
|
|
*/
|
2014-12-01 23:39:28 +01:00
|
|
|
void *trie_insert(struct trie *trie, const char *key, size_t len, void *value)
|
2014-09-23 21:15:25 +02:00
|
|
|
{
|
|
|
|
struct __trie_find_res res = __trie_find(trie, key, len);
|
|
|
|
|
|
|
|
if (res.node) {
|
|
|
|
if (__is_exact(&res, len)) {
|
2014-12-01 23:39:28 +01:00
|
|
|
void *last = res.node->value;
|
|
|
|
|
2014-09-23 21:15:25 +02:00
|
|
|
res.node->value = value;
|
2014-12-01 23:39:28 +01:00
|
|
|
return last;
|
2014-09-23 21:15:25 +02:00
|
|
|
}
|
2014-09-28 22:01:52 +02:00
|
|
|
__trie_insert_split(&res, key, len, value);
|
|
|
|
} else {
|
2014-12-01 23:39:28 +01:00
|
|
|
__trie_insert_child(&res, key, len, value);
|
2014-09-23 21:15:25 +02:00
|
|
|
}
|
2014-12-01 23:39:28 +01:00
|
|
|
return NULL;
|
2014-09-23 21:15:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __trie_remove_leaf() - Remove a leaf node
|
2014-09-28 22:01:52 +02:00
|
|
|
* @res: struct __trie_find_res
|
|
|
|
* @dtor: value destructor
|
2014-09-23 21:15:25 +02:00
|
|
|
*/
|
2014-09-28 22:01:52 +02:00
|
|
|
static void __trie_remove_leaf(struct __trie_find_res *res,
|
|
|
|
void (*dtor)(void *))
|
2014-09-23 21:15:25 +02:00
|
|
|
{
|
|
|
|
struct trie *node = res->node, *parent = res->parent;
|
|
|
|
int i, j, k;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Unlink the node which means
|
|
|
|
* a) delete it from its parent's mask and
|
|
|
|
* b) delete it from its parent's children array
|
|
|
|
*
|
|
|
|
* Then we can simply destroy it.
|
|
|
|
*/
|
|
|
|
|
|
|
|
i = __key_index(*node->key);
|
|
|
|
j = __key_to_array_index(parent, *node->key);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* b) -- yes, we do b) before a) to not need to undo the mask
|
|
|
|
* changes if an allocation fails :-) and the code is different for
|
|
|
|
* each of the cases below, anyway:
|
|
|
|
* i) if the parent will have no children left, we don't bother
|
|
|
|
* doing reallocations and stuff.
|
|
|
|
* ii) if the value-less non-root parent will only have one child
|
|
|
|
* left, merge these two nodes to save space.
|
|
|
|
* iii) else, reallocate the children array normally.
|
|
|
|
*
|
|
|
|
* In i), it is impossible that the parent would have no value
|
|
|
|
* because of ii) in a former removal (just saying that every leaf
|
|
|
|
* node (except the root if it becomes a leaf) is guaranteed to have
|
|
|
|
* a value).
|
|
|
|
*/
|
|
|
|
if (parent->nchildren == 1) { /* i) */
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Free((void **) &parent->children);
|
2014-09-23 21:15:25 +02:00
|
|
|
parent->children = NULL;
|
|
|
|
parent->nchildren = 0;
|
|
|
|
/* a) */
|
|
|
|
clear_bit(parent, i);
|
|
|
|
parent->nchildren--;
|
|
|
|
/* !parent->len is equivalent to parent == trie_root */
|
|
|
|
} else if (parent->nchildren == 2 && !parent->value
|
|
|
|
&& !parent->len) { /* ii) */
|
|
|
|
struct trie *other;
|
|
|
|
|
|
|
|
if (parent->children[0] == node)
|
|
|
|
other = parent->children[1];
|
|
|
|
else
|
|
|
|
other = parent->children[0];
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Realloc((void **) &parent, sizeof(*parent) + parent->len
|
|
|
|
+ other->len);
|
2014-09-23 21:15:25 +02:00
|
|
|
memcpy(parent->key + parent->len, other->key, other->len);
|
|
|
|
parent->len += other->len;
|
|
|
|
/* does a) */
|
|
|
|
memcpy(parent->mask, other->mask, sizeof(parent->mask));
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Free((void **) &parent->children);
|
2014-09-23 21:15:25 +02:00
|
|
|
parent->children = other->children;
|
|
|
|
parent->nchildren = other->nchildren;
|
|
|
|
parent->value = other->value;
|
|
|
|
/* Do NOT destroy_node() as we copied its ->children! */
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Free((void **) &other);
|
2014-09-23 21:15:25 +02:00
|
|
|
} else { /* iii) */
|
|
|
|
/* does a) */
|
|
|
|
for (k = j + 1; k < parent->nchildren; k++)
|
|
|
|
parent->children[k - 1] = parent->children[k];
|
|
|
|
parent->nchildren--;
|
2014-09-28 22:01:52 +02:00
|
|
|
GB.Realloc((void **) &parent->children, parent->nchildren *
|
|
|
|
sizeof(*parent->children));
|
2014-09-23 21:15:25 +02:00
|
|
|
clear_bit(parent, i);
|
|
|
|
}
|
|
|
|
|
2014-09-28 22:01:52 +02:00
|
|
|
destroy_node(node, dtor);
|
2014-09-23 21:15:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __trie_remove_interior() - Remove an interior node
|
2014-09-28 22:01:52 +02:00
|
|
|
* @res: struct __trie_find_res
|
|
|
|
* @dtor: value destructor
|
2014-09-23 21:15:25 +02:00
|
|
|
*/
|
2014-09-28 22:01:52 +02:00
|
|
|
static void __trie_remove_interior(struct __trie_find_res *res,
|
|
|
|
void (*dtor)(void *))
|
2014-09-23 21:15:25 +02:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Let's see: an interior node can only have 2 or more children, so
|
|
|
|
* we cannot possibly do any compression of the nodes. We just erase
|
|
|
|
* the value and leave the trie structure as-is.
|
|
|
|
*/
|
2014-09-28 22:01:52 +02:00
|
|
|
dtor(res->node->value);
|
2014-09-23 21:15:25 +02:00
|
|
|
res->node->value = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* trie_remove() - Remove a key from the trie
|
|
|
|
* @trie: struct trie
|
|
|
|
* @key: the key
|
|
|
|
* @len: length
|
|
|
|
* @dtor: value destructor
|
|
|
|
*/
|
2014-09-28 22:01:52 +02:00
|
|
|
void trie_remove(struct trie *trie, const char *key, size_t len,
|
|
|
|
void (*dtor)(void *))
|
2014-09-23 21:15:25 +02:00
|
|
|
{
|
|
|
|
struct __trie_find_res res;
|
|
|
|
struct trie *node;
|
|
|
|
|
|
|
|
res = __trie_find(trie, key, len);
|
|
|
|
node = res.node;
|
|
|
|
/*
|
|
|
|
* We only want to work with valued, exactly-matching non-roots.
|
|
|
|
* Delete a value from the root anyways.
|
|
|
|
*/
|
|
|
|
if (!node || !__is_exact(&res, len) || !node->value)
|
2014-09-28 22:01:52 +02:00
|
|
|
return;
|
|
|
|
if (node == trie) {
|
|
|
|
dtor(node->value);
|
|
|
|
node->value = NULL;
|
|
|
|
return;
|
|
|
|
}
|
2014-09-23 21:15:25 +02:00
|
|
|
|
|
|
|
if (!node->children)
|
2014-09-28 22:01:52 +02:00
|
|
|
__trie_remove_leaf(&res, dtor);
|
|
|
|
else
|
|
|
|
__trie_remove_interior(&res, dtor);
|
2014-09-23 21:15:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* trie_find() - Get a trie node from its key
|
|
|
|
* @trie: struct trie
|
|
|
|
* @key: the key
|
|
|
|
* @len: length
|
|
|
|
*
|
|
|
|
* Returns NULL if the key was not found. The empty string maps to the root
|
|
|
|
* node of the trie.
|
|
|
|
*/
|
|
|
|
struct trie *trie_find(const struct trie *trie, const char *key, size_t len)
|
|
|
|
{
|
|
|
|
return __trie_find_exact(trie, key, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* trie_value() - Get value corresponding to a key
|
|
|
|
* @trie: struct trie
|
|
|
|
* @key: the key
|
|
|
|
* @len: length
|
|
|
|
*
|
|
|
|
* Return NULL if the key was not found. The empty string maps to the root
|
|
|
|
* node of the trie.
|
|
|
|
*/
|
|
|
|
void *trie_value(const struct trie *trie, const char *key, size_t len)
|
|
|
|
{
|
|
|
|
struct trie *node = trie_find(trie, key, len);
|
|
|
|
|
|
|
|
return node ? node->value : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* trie_constrain() - Constrain the trie paths
|
|
|
|
* @trie: struct trie
|
|
|
|
* @p: struct trie_prefix
|
|
|
|
* @c: character
|
|
|
|
*
|
|
|
|
* To `constrain' a trie means to limit keys to a given prefix. If you have
|
|
|
|
* a trie consisting of keys "test", "tesla" and "term" and you constrain it
|
|
|
|
* with the prefix "tes", only "test" and "tesla" will be reachable.
|
|
|
|
*
|
|
|
|
* The constraint is saved in the struct trie_prefix. Contraining a trie
|
|
|
|
* does not alter its structure so that you can constrain the same trie
|
|
|
|
* multiple times simultaneously.
|
|
|
|
*
|
|
|
|
* By calling this function multiple times, you can refine the prefix in
|
|
|
|
* `p'. To begin without a constraint, use a prefix filled by
|
|
|
|
* trie_reset_prefix().
|
|
|
|
*
|
|
|
|
* If the prefix is not found, `p' is reset.
|
|
|
|
*
|
|
|
|
* WARNING
|
|
|
|
* Using any of the prefix-aware functions implies that the trie did not
|
|
|
|
* change between calls. If it did, the prefix may be invalid and the
|
|
|
|
* program may crash in consequence.
|
|
|
|
*/
|
|
|
|
void trie_constrain(const struct trie *trie, struct trie_prefix *p, char c)
|
|
|
|
{
|
|
|
|
struct trie *node;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
node = p->node ? : (struct trie *) trie;
|
|
|
|
i = p->i;
|
|
|
|
|
|
|
|
if (i == node->len) {
|
|
|
|
node = get_continuation(node, c);
|
|
|
|
if (!node)
|
|
|
|
goto reset;
|
|
|
|
p->node = node;
|
|
|
|
/* node->len is guaranteed to be positive here */
|
|
|
|
p->i = 1;
|
|
|
|
} else {
|
|
|
|
if (node->key[i] != c)
|
|
|
|
goto reset;
|
|
|
|
p->i = ++i;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* As a `logical' node counts only what has a value. If you insert
|
|
|
|
* "soup" and "sour", the resulting "sou" node would NOT exist
|
|
|
|
* logically.
|
|
|
|
*
|
|
|
|
* Additionally, the match must be exact to count as TRIE_EXACT.
|
|
|
|
*/
|
|
|
|
if (p->i == node->len && node->value)
|
|
|
|
p->state = TRIE_EXACT;
|
|
|
|
else
|
|
|
|
p->state = TRIE_EXIST;
|
|
|
|
return;
|
|
|
|
|
|
|
|
reset:
|
|
|
|
trie_reset_prefix(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* trie_constrain2() - Constrain the trie multiple times
|
|
|
|
* @trie: struct trie
|
|
|
|
* @p: struct trie_prefix
|
|
|
|
* @str: string
|
|
|
|
* @len: length
|
|
|
|
*
|
|
|
|
* This function calls trie_constrain() in a loop - but with the
|
|
|
|
* difference that as soon as the prefix is not found, the function
|
|
|
|
* returns. In effect, the `str' is taken of consecutive constraints
|
|
|
|
* which should *ALL* be applied in row or none of them.
|
|
|
|
*/
|
|
|
|
void trie_constrain2(const struct trie *trie, struct trie_prefix *p,
|
|
|
|
const char *str, size_t len)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!len) {
|
|
|
|
p->state = trie->value ? TRIE_EXACT : TRIE_EXIST;
|
|
|
|
p->node = (struct trie *) trie;
|
|
|
|
p->i = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
trie_constrain(trie, p, str[i]);
|
|
|
|
if (p->state == TRIE_UNSET)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* trie_find2() - Find a key from a trie constraint
|
|
|
|
* @trie: struct trie
|
|
|
|
* @p: struct trie_prefix
|
|
|
|
* @key: the key relative (!) to the prefix `p'
|
|
|
|
* @len: length
|
|
|
|
*
|
|
|
|
* This function is similar to trie_find(), except that keys are relative to
|
|
|
|
* the constraint in `p'.
|
|
|
|
*/
|
|
|
|
struct trie *trie_find2(const struct trie *trie,
|
|
|
|
const struct trie_prefix *p,
|
|
|
|
const char *key, size_t len)
|
|
|
|
{
|
|
|
|
struct trie *node;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
node = p->node ? : (struct trie *) trie;
|
|
|
|
i = p->i;
|
|
|
|
/*
|
|
|
|
* First consume the rest of the prefix node. If none of the trivial
|
|
|
|
* cases occured, we can then use the normal traversal algorithm.
|
|
|
|
*/
|
|
|
|
for (j = 0; i < node->len && j < len; i++, j++)
|
|
|
|
if (node->key[i] != key[j])
|
|
|
|
return NULL;
|
|
|
|
if (j == len)
|
|
|
|
return node;
|
|
|
|
node = get_continuation(node, key[j]);
|
|
|
|
if (!node)
|
|
|
|
return NULL;
|
|
|
|
return __trie_find_exact(node, key, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* trie_value2() - Analogon to trie_value() using trie_find2()
|
|
|
|
* @trie: struct trie
|
|
|
|
* @p: struct trie_prefix
|
|
|
|
* @key: the key
|
|
|
|
* @len: length
|
|
|
|
*/
|
|
|
|
void *trie_value2(const struct trie *trie, const struct trie_prefix *p,
|
|
|
|
const char *key, size_t len)
|
|
|
|
{
|
|
|
|
struct trie *node = trie_find2(trie, p, key, len);
|
|
|
|
|
|
|
|
return node ? node->value : NULL;
|
|
|
|
}
|