631 lines
14 KiB
C
631 lines
14 KiB
C
/*
|
|
* This file is part of ltrace.
|
|
* Copyright (C) 2012, 2013 Petr Machata, Red Hat Inc.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License as
|
|
* published by the Free Software Foundation; either version 2 of the
|
|
* License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
|
* 02110-1301 USA
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include "dict.h"
|
|
|
|
struct status_bits {
|
|
unsigned char taken : 1;
|
|
unsigned char erased : 1;
|
|
};
|
|
|
|
static struct status_bits *
|
|
bitp(struct dict *dict, size_t n)
|
|
{
|
|
return VECT_ELEMENT(&dict->status, struct status_bits, n);
|
|
}
|
|
|
|
void
|
|
dict_init(struct dict *dict,
|
|
size_t key_size, size_t value_size,
|
|
size_t (*hash1)(const void *),
|
|
int (*eq)(const void *, const void *),
|
|
size_t (*hash2)(size_t))
|
|
{
|
|
assert(hash1 != NULL);
|
|
assert(eq != NULL);
|
|
|
|
vect_init(&dict->keys, key_size);
|
|
vect_init(&dict->values, value_size);
|
|
VECT_INIT(&dict->status, struct status_bits);
|
|
dict->size = 0;
|
|
|
|
dict->hash1 = hash1;
|
|
dict->hash2 = hash2;
|
|
dict->eq = eq;
|
|
}
|
|
|
|
struct clone_data {
|
|
struct dict *target;
|
|
int (*clone_key)(void *tgt, const void *src, void *data);
|
|
int (*clone_value)(void *tgt, const void *src, void *data);
|
|
void (*dtor_key)(void *tgt, void *data);
|
|
void (*dtor_value)(void *tgt, void *data);
|
|
void *data;
|
|
};
|
|
|
|
static enum callback_status
|
|
clone_cb(void *key, void *value, void *data)
|
|
{
|
|
struct clone_data *clone_data = data;
|
|
|
|
char nkey[clone_data->target->keys.elt_size];
|
|
if (clone_data->clone_key == NULL)
|
|
memmove(nkey, key, sizeof(nkey));
|
|
else if (clone_data->clone_key(&nkey, key, clone_data->data) < 0)
|
|
return CBS_STOP;
|
|
|
|
char nvalue[clone_data->target->values.elt_size];
|
|
if (clone_data->clone_value == NULL) {
|
|
memmove(nvalue, value, sizeof(nvalue));
|
|
} else if (clone_data->clone_value(&nvalue, value,
|
|
clone_data->data) < 0) {
|
|
fail:
|
|
if (clone_data->clone_key != NULL)
|
|
clone_data->dtor_key(&nkey, clone_data->data);
|
|
return CBS_STOP;
|
|
}
|
|
|
|
if (dict_insert(clone_data->target, nkey, nvalue) < 0) {
|
|
if (clone_data->clone_value != NULL)
|
|
clone_data->dtor_value(&nvalue, clone_data->data);
|
|
goto fail;
|
|
}
|
|
|
|
return CBS_CONT;
|
|
}
|
|
|
|
int
|
|
dict_clone(struct dict *target, const struct dict *source,
|
|
int (*clone_key)(void *tgt, const void *src, void *data),
|
|
void (*dtor_key)(void *tgt, void *data),
|
|
int (*clone_value)(void *tgt, const void *src, void *data),
|
|
void (*dtor_value)(void *tgt, void *data),
|
|
void *data)
|
|
{
|
|
assert((clone_key != NULL) == (dtor_key != NULL));
|
|
assert((clone_value != NULL) == (dtor_value != NULL));
|
|
|
|
dict_init(target, source->keys.elt_size, source->values.elt_size,
|
|
source->hash1, source->eq, source->hash2);
|
|
struct clone_data clone_data = {
|
|
target, clone_key, clone_value, dtor_key, dtor_value, data
|
|
};
|
|
if (dict_each((struct dict *)source, NULL,
|
|
clone_cb, &clone_data) != NULL) {
|
|
dict_destroy(target, dtor_key, dtor_value, data);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
size_t
|
|
dict_size(const struct dict *dict)
|
|
{
|
|
return dict->size;
|
|
}
|
|
|
|
int
|
|
dict_empty(const struct dict *dict)
|
|
{
|
|
return dict->size == 0;
|
|
}
|
|
|
|
struct destroy_data {
|
|
void (*dtor_key)(void *tgt, void *data);
|
|
void (*dtor_value)(void *tgt, void *data);
|
|
void *data;
|
|
};
|
|
|
|
static enum callback_status
|
|
destroy_cb(void *key, void *value, void *data)
|
|
{
|
|
struct destroy_data *destroy_data = data;
|
|
if (destroy_data->dtor_key)
|
|
destroy_data->dtor_key(key, destroy_data->data);
|
|
if (destroy_data->dtor_value)
|
|
destroy_data->dtor_value(value, destroy_data->data);
|
|
return CBS_CONT;
|
|
}
|
|
|
|
void
|
|
dict_destroy(struct dict *dict,
|
|
void (*dtor_key)(void *tgt, void *data),
|
|
void (*dtor_value)(void *tgt, void *data),
|
|
void *data)
|
|
{
|
|
/* Some slots are unused (the corresponding keys and values
|
|
* are uninitialized), so we can't call dtors for them.
|
|
* Iterate DICT instead. */
|
|
if (dtor_key != NULL || dtor_value != NULL) {
|
|
struct destroy_data destroy_data = {
|
|
dtor_key, dtor_value, data
|
|
};
|
|
dict_each(dict, NULL, destroy_cb, &destroy_data);
|
|
}
|
|
|
|
vect_destroy(&dict->keys, NULL, NULL);
|
|
vect_destroy(&dict->values, NULL, NULL);
|
|
vect_destroy(&dict->status, NULL, NULL);
|
|
}
|
|
|
|
static size_t
|
|
default_secondary_hash(size_t pos)
|
|
{
|
|
return pos % 97 + 1;
|
|
}
|
|
|
|
static size_t
|
|
small_secondary_hash(size_t pos)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline size_t
|
|
n(struct dict *dict)
|
|
{
|
|
return vect_size(&dict->keys);
|
|
}
|
|
|
|
static inline size_t (*
|
|
hash2(struct dict *dict))(size_t)
|
|
{
|
|
if (dict->hash2 != NULL)
|
|
return dict->hash2;
|
|
else if (n(dict) < 100)
|
|
return small_secondary_hash;
|
|
else
|
|
return default_secondary_hash;
|
|
}
|
|
|
|
static void *
|
|
getkey(struct dict *dict, size_t pos)
|
|
{
|
|
return ((unsigned char *)dict->keys.data)
|
|
+ dict->keys.elt_size * pos;
|
|
}
|
|
|
|
static void *
|
|
getvalue(struct dict *dict, size_t pos)
|
|
{
|
|
return ((unsigned char *)dict->values.data)
|
|
+ dict->values.elt_size * pos;
|
|
}
|
|
|
|
static size_t
|
|
find_slot(struct dict *dict, const void *key,
|
|
int *foundp, int *should_rehash, size_t *pi)
|
|
{
|
|
assert(n(dict) > 0);
|
|
size_t pos = dict->hash1(key) % n(dict);
|
|
size_t pos0 = -1;
|
|
size_t d = hash2(dict)(pos);
|
|
size_t i = 0;
|
|
*foundp = 0;
|
|
|
|
/* We skip over any taken or erased slots. But we remember
|
|
* the first erased that we find, and if we don't find the key
|
|
* later, we return that position. */
|
|
for (; bitp(dict, pos)->taken || bitp(dict, pos)->erased;
|
|
pos = (pos + d) % n(dict)) {
|
|
if (pos0 == (size_t)-1 && bitp(dict, pos)->erased)
|
|
pos0 = pos;
|
|
|
|
/* If there is a loop, but we've seen an erased
|
|
* element, take that one. Otherwise give up. */
|
|
if (++i > dict->size) {
|
|
if (pos0 != (size_t)-1)
|
|
break;
|
|
return (size_t)-1;
|
|
}
|
|
|
|
if (bitp(dict, pos)->taken
|
|
&& dict->eq(getkey(dict, pos), key)) {
|
|
*foundp = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!*foundp && pos0 != (size_t)-1)
|
|
pos = pos0;
|
|
|
|
/* If the hash table degraded into a linked list, request a
|
|
* rehash. */
|
|
if (should_rehash != NULL)
|
|
*should_rehash = i > 10 && i > n(dict) / 10;
|
|
|
|
if (pi != NULL)
|
|
*pi = i;
|
|
return pos;
|
|
}
|
|
|
|
static enum callback_status
|
|
rehash_move(void *key, void *value, void *data)
|
|
{
|
|
if (dict_insert(data, key, value) < 0)
|
|
return CBS_STOP;
|
|
else
|
|
return CBS_CONT;
|
|
}
|
|
|
|
static int
|
|
rehash(struct dict *dict, size_t nn)
|
|
{
|
|
assert(nn != n(dict));
|
|
int ret = -1;
|
|
|
|
struct dict tmp;
|
|
dict_init(&tmp, dict->keys.elt_size, dict->values.elt_size,
|
|
dict->hash1, dict->eq, dict->hash2);
|
|
|
|
/* To honor all invariants (so that we can safely call
|
|
* dict_destroy), we first make a request to _reserve_ enough
|
|
* room in all vectors. This has no observable effect on
|
|
* contents of vectors. */
|
|
if (vect_reserve(&tmp.keys, nn) < 0
|
|
|| vect_reserve(&tmp.values, nn) < 0
|
|
|| vect_reserve(&tmp.status, nn) < 0)
|
|
goto done;
|
|
|
|
/* Now that we know that there is enough size in vectors, we
|
|
* simply bump the size. */
|
|
tmp.keys.size = nn;
|
|
tmp.values.size = nn;
|
|
size_t old_size = tmp.status.size;
|
|
tmp.status.size = nn;
|
|
memset(VECT_ELEMENT(&tmp.status, struct status_bits, old_size),
|
|
0, (tmp.status.size - old_size) * tmp.status.elt_size);
|
|
|
|
/* At this point, TMP is once more an empty dictionary with NN
|
|
* slots. Now move stuff from DICT to TMP. */
|
|
if (dict_each(dict, NULL, rehash_move, &tmp) != NULL)
|
|
goto done;
|
|
|
|
/* And now swap contents of DICT and TMP, and we are done. */
|
|
{
|
|
struct dict tmp2 = *dict;
|
|
*dict = tmp;
|
|
tmp = tmp2;
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
done:
|
|
/* We only want to release the containers, not the actual data
|
|
* that they hold, so it's fine if we don't pass any dtor. */
|
|
dict_destroy(&tmp, NULL, NULL, NULL);
|
|
return ret;
|
|
|
|
}
|
|
|
|
static const size_t primes[] = {
|
|
13, 31, 61, 127, 251, 509, 1021, 2039, 4093,
|
|
8191, 16381, 32749, 65521, 130981, 0
|
|
};
|
|
|
|
static size_t
|
|
larger_size(size_t current)
|
|
{
|
|
if (current == 0)
|
|
return primes[0];
|
|
|
|
if (current < primes[sizeof(primes)/sizeof(*primes) - 2]) {
|
|
size_t i;
|
|
for (i = 0; primes[i] != 0; ++i)
|
|
if (primes[i] > current)
|
|
return primes[i];
|
|
abort();
|
|
}
|
|
|
|
/* We ran out of primes, so invent a new one. The following
|
|
* gives primes until about 17M elements (and then some more
|
|
* later). */
|
|
return 2 * current + 6585;
|
|
}
|
|
|
|
static size_t
|
|
smaller_size(size_t current)
|
|
{
|
|
if (current <= primes[0])
|
|
return primes[0];
|
|
|
|
if (current <= primes[sizeof(primes)/sizeof(*primes) - 2]) {
|
|
size_t i;
|
|
size_t prev = 0;
|
|
for (i = 0; primes[i] != 0; ++i) {
|
|
if (primes[i] >= current)
|
|
return prev;
|
|
prev = primes[i];
|
|
}
|
|
abort();
|
|
}
|
|
|
|
return (current - 6585) / 2;
|
|
}
|
|
|
|
int
|
|
dict_insert(struct dict *dict, void *key, void *value)
|
|
{
|
|
if (n(dict) == 0 || dict->size > 0.7 * n(dict))
|
|
rehash:
|
|
if (rehash(dict, larger_size(n(dict))) < 0)
|
|
return -1;
|
|
|
|
int found;
|
|
int should_rehash;
|
|
size_t slot_n = find_slot(dict, key, &found, &should_rehash, NULL);
|
|
if (slot_n == (size_t)-1)
|
|
return -1;
|
|
if (found)
|
|
return 1;
|
|
assert(!bitp(dict, slot_n)->taken);
|
|
|
|
/* If rehash was requested, do that, and retry. But just live
|
|
* with it for apparently sparse tables. No resizing can fix
|
|
* a rubbish hash. */
|
|
if (should_rehash && dict->size > 0.3 * n(dict))
|
|
goto rehash;
|
|
|
|
memmove(getkey(dict, slot_n), key, dict->keys.elt_size);
|
|
memmove(getvalue(dict, slot_n), value, dict->values.elt_size);
|
|
|
|
bitp(dict, slot_n)->taken = 1;
|
|
bitp(dict, slot_n)->erased = 0;
|
|
++dict->size;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void *
|
|
dict_find(struct dict *dict, const void *key)
|
|
{
|
|
if (dict->size == 0)
|
|
return NULL;
|
|
assert(n(dict) > 0);
|
|
|
|
int found;
|
|
size_t slot_n = find_slot(dict, key, &found, NULL, NULL);
|
|
if (found)
|
|
return getvalue(dict, slot_n);
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
int
|
|
dict_erase(struct dict *dict, const void *key,
|
|
void (*dtor_key)(void *tgt, void *data),
|
|
void (*dtor_value)(void *tgt, void *data),
|
|
void *data)
|
|
{
|
|
int found;
|
|
size_t i;
|
|
size_t slot_n = find_slot(dict, key, &found, NULL, &i);
|
|
if (!found)
|
|
return -1;
|
|
|
|
if (dtor_key != NULL)
|
|
dtor_key(getkey(dict, slot_n), data);
|
|
if (dtor_value != NULL)
|
|
dtor_value(getvalue(dict, slot_n), data);
|
|
|
|
bitp(dict, slot_n)->taken = 0;
|
|
bitp(dict, slot_n)->erased = 1;
|
|
--dict->size;
|
|
|
|
if (dict->size < 0.3 * n(dict)) {
|
|
size_t smaller = smaller_size(n(dict));
|
|
if (smaller != n(dict))
|
|
/* Don't mind if it fails when shrinking. */
|
|
rehash(dict, smaller);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void *
|
|
dict_each(struct dict *dict, void *start_after,
|
|
enum callback_status (*cb)(void *, void *, void *), void *data)
|
|
{
|
|
size_t i;
|
|
if (start_after != NULL)
|
|
i = ((start_after - dict->keys.data) / dict->keys.elt_size) + 1;
|
|
else
|
|
i = 0;
|
|
|
|
for (; i < dict->keys.size; ++i)
|
|
if (bitp(dict, i)->taken && !bitp(dict, i)->erased) {
|
|
void *key = getkey(dict, i);
|
|
if (cb(key, getvalue(dict, i), data) != CBS_CONT)
|
|
return key;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
size_t
|
|
dict_hash_int(const int *key)
|
|
{
|
|
return (size_t)(*key * 2654435761U);
|
|
}
|
|
|
|
int
|
|
dict_eq_int(const int *key1, const int *key2)
|
|
{
|
|
return *key1 == *key2;
|
|
}
|
|
|
|
size_t
|
|
dict_hash_string(const char **key)
|
|
{
|
|
size_t h = 5381;
|
|
const char *str = *key;
|
|
while (*str != 0)
|
|
h = h * 33 ^ *str++;
|
|
return h;
|
|
}
|
|
|
|
int
|
|
dict_eq_string(const char **key1, const char **key2)
|
|
{
|
|
return strcmp(*key1, *key2) == 0;
|
|
}
|
|
|
|
void
|
|
dict_dtor_string(const char **key, void *data)
|
|
{
|
|
free((char *)*key);
|
|
}
|
|
|
|
int
|
|
dict_clone_string(const char **tgt, const char **src, void *data)
|
|
{
|
|
*tgt = strdup(*src);
|
|
return *tgt != NULL ? 0 : -1;
|
|
}
|
|
|
|
#ifdef TEST
|
|
static enum callback_status
|
|
dump(int *key, int *value, void *data)
|
|
{
|
|
char *seen = data;
|
|
assert(seen[*key] == 0);
|
|
seen[*key] = 1;
|
|
assert(*value == *key * 2 + 1);
|
|
return CBS_STOP;
|
|
}
|
|
|
|
static size_t
|
|
dict_hash_int_silly(const int *key)
|
|
{
|
|
return *key % 10;
|
|
}
|
|
|
|
static void
|
|
verify(struct dict *di, size_t len, char *seen)
|
|
{
|
|
size_t ct = 0;
|
|
int *it;
|
|
for (it = NULL; (it = DICT_EACH(di, int, int, it, dump, seen)) != NULL;)
|
|
ct++;
|
|
assert(ct == len);
|
|
memset(seen, 0, len);
|
|
}
|
|
|
|
static enum callback_status
|
|
fill_keys(int *key, int *value, void *data)
|
|
{
|
|
int *array = data;
|
|
array[++array[0]] = *key;
|
|
return CBS_CONT;
|
|
}
|
|
|
|
static void
|
|
test1(void)
|
|
{
|
|
struct dict di;
|
|
DICT_INIT(&di, int, int, dict_hash_int, dict_eq_int, NULL);
|
|
|
|
char seen[100000] = {};
|
|
size_t i;
|
|
for (i = 0; i < sizeof(seen); ++i) {
|
|
int key = i;
|
|
int value = 2 * i + 1;
|
|
DICT_INSERT(&di, &key, &value);
|
|
int *valp = DICT_FIND_REF(&di, &key, int);
|
|
assert(valp != NULL);
|
|
assert(*valp == value);
|
|
assert(dict_size(&di) == i + 1);
|
|
}
|
|
|
|
verify(&di, sizeof(seen), seen);
|
|
|
|
struct dict d2;
|
|
DICT_CLONE(&d2, &di, int, int, NULL, NULL, NULL, NULL, NULL);
|
|
DICT_DESTROY(&di, int, int, NULL, NULL, NULL);
|
|
verify(&d2, sizeof(seen), seen);
|
|
|
|
/* Now we try to gradually erase all elements. We can't erase
|
|
* inside a DICT_EACH call, so copy first keys to a separate
|
|
* memory area first. */
|
|
int keys[d2.size + 1];
|
|
size_t ct = 0;
|
|
keys[0] = 0;
|
|
DICT_EACH(&d2, int, int, NULL, fill_keys, keys);
|
|
for (i = 0; i < (size_t)keys[0]; ++i) {
|
|
assert(DICT_ERASE(&d2, &keys[i + 1], int,
|
|
NULL, NULL, NULL) == 0);
|
|
++ct;
|
|
}
|
|
assert(ct == sizeof(seen));
|
|
DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
|
|
}
|
|
|
|
static void
|
|
test_erase(void)
|
|
{
|
|
int i;
|
|
|
|
/* To test erase, we need a relatively bad hash function, so
|
|
* that there are some overlapping chains in the table. */
|
|
struct dict d2;
|
|
DICT_INIT(&d2, int, int, dict_hash_int_silly, dict_eq_int, NULL);
|
|
const int limit = 500;
|
|
for (i = 0; i < limit; ++i) {
|
|
int key = 2 * i + 1;
|
|
int value = 2 * key + 1;
|
|
DICT_INSERT(&d2, &key, &value);
|
|
}
|
|
|
|
/* Now we try to delete each of the keys, and verify that none
|
|
* of the chains was broken. */
|
|
for (i = 0; i < limit; ++i) {
|
|
struct dict copy;
|
|
DICT_CLONE(©, &d2, int, int, NULL, NULL, NULL, NULL, NULL);
|
|
int key = 2 * i + 1;
|
|
DICT_ERASE(©, &key, int, NULL, NULL, NULL);
|
|
assert(dict_size(©) == dict_size(&d2) - 1);
|
|
|
|
int j;
|
|
for (j = 0; j < limit; ++j) {
|
|
key = 2 * j + 1;
|
|
int *valp = DICT_FIND_REF(©, &key, int);
|
|
if (i != j) {
|
|
assert(valp != NULL);
|
|
assert(*valp == 2 * key + 1);
|
|
} else {
|
|
assert(valp == NULL);
|
|
}
|
|
}
|
|
|
|
DICT_DESTROY(©, int, int, NULL, NULL, NULL);
|
|
}
|
|
DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
test1();
|
|
test_erase();
|
|
return 0;
|
|
}
|
|
|
|
#endif
|