atom: rewrite as a hash table
While the previous 1987-style[0] scheme was fun (and I reasonably optimized it for a fair comparison), this task is more suited to a hash table. Even a simple implementation beats the old one. [0] Seems to have first appeared in X11R1, released September 1987. See server/dix/atom.c here: https://www.x.org/releases/X11R1/X.V11R1.tar.gz Signed-off-by: Ran Benita <ran@unusedvar.com>master
parent
baf5522649
commit
16fe837d8d
128
src/atom.c
128
src/atom.c
|
@ -72,8 +72,14 @@
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
#include "utils.h"
|
#include <assert.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
|
#include "darray.h"
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
/* FNV-1a (http://www.isthe.com/chongo/tech/comp/fnv/). */
|
/* FNV-1a (http://www.isthe.com/chongo/tech/comp/fnv/). */
|
||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
|
@ -90,31 +96,14 @@ hash_buf(const char *string, size_t len)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The atom table is a insert-only unbalanced binary search tree
|
* The atom table is an insert-only linear probing hash table
|
||||||
* mapping strings to atoms.
|
* mapping strings to atoms. Another array maps the atoms to
|
||||||
*
|
* strings. The atom value is the position in the strings array.
|
||||||
* The tree nodes are kept contiguously in the `table` array.
|
|
||||||
*
|
|
||||||
* The atom value is the index of the tree node in the array.
|
|
||||||
*
|
|
||||||
* As an optimization, strings are not compared by value directly,
|
|
||||||
* s1 < s2
|
|
||||||
* instead, they are compared by fingerprint (hash) and the value
|
|
||||||
* is only used to resolve collisions:
|
|
||||||
* (fingerprint(s1), s1) < (fingerprint(s2), s2)
|
|
||||||
* Fingerprint are pre-calculated and saved in the tree nodes.
|
|
||||||
*
|
|
||||||
* Why is this not just a hash table? Who knows!
|
|
||||||
*/
|
*/
|
||||||
struct atom_node {
|
|
||||||
xkb_atom_t left, right;
|
|
||||||
uint32_t fingerprint;
|
|
||||||
char *string;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct atom_table {
|
struct atom_table {
|
||||||
xkb_atom_t root;
|
xkb_atom_t *index;
|
||||||
darray(struct atom_node) table;
|
size_t index_size;
|
||||||
|
darray(char *) strings;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct atom_table *
|
struct atom_table *
|
||||||
|
@ -124,9 +113,10 @@ atom_table_new(void)
|
||||||
if (!table)
|
if (!table)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
darray_init(table->table);
|
darray_init(table->strings);
|
||||||
/* The original throw-away root is here, at the illegal atom 0. */
|
darray_append(table->strings, NULL);
|
||||||
darray_resize0(table->table, 1);
|
table->index_size = 4;
|
||||||
|
table->index = calloc(table->index_size, sizeof(*table->index));
|
||||||
|
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
@ -137,61 +127,67 @@ atom_table_free(struct atom_table *table)
|
||||||
if (!table)
|
if (!table)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
struct atom_node *node;
|
char **string;
|
||||||
darray_foreach(node, table->table)
|
darray_foreach(string, table->strings)
|
||||||
free(node->string);
|
free(*string);
|
||||||
darray_free(table->table);
|
darray_free(table->strings);
|
||||||
|
free(table->index);
|
||||||
free(table);
|
free(table);
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *
|
const char *
|
||||||
atom_text(struct atom_table *table, xkb_atom_t atom)
|
atom_text(struct atom_table *table, xkb_atom_t atom)
|
||||||
{
|
{
|
||||||
assert(atom < darray_size(table->table));
|
assert(atom < darray_size(table->strings));
|
||||||
return darray_item(table->table, atom).string;
|
return darray_item(table->strings, atom);
|
||||||
}
|
}
|
||||||
|
|
||||||
xkb_atom_t
|
xkb_atom_t
|
||||||
atom_intern(struct atom_table *table, const char *string, size_t len, bool add)
|
atom_intern(struct atom_table *table, const char *string, size_t len, bool add)
|
||||||
{
|
{
|
||||||
uint32_t fingerprint = hash_buf(string, len);
|
if (darray_size(table->strings) > 0.80 * table->index_size) {
|
||||||
|
table->index_size *= 2;
|
||||||
|
table->index = realloc(table->index, table->index_size * sizeof(*table->index));
|
||||||
|
memset(table->index, 0, table->index_size * sizeof(*table->index));
|
||||||
|
for (size_t j = 1; j < darray_size(table->strings); j++) {
|
||||||
|
const char *s = darray_item(table->strings, j);
|
||||||
|
uint32_t hash = hash_buf(s, strlen(s));
|
||||||
|
for (size_t i = 0; i < table->index_size; i++) {
|
||||||
|
size_t index_pos = (hash + i) & (table->index_size - 1);
|
||||||
|
if (index_pos == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
xkb_atom_t *atomp = &table->root;
|
xkb_atom_t atom = table->index[index_pos];
|
||||||
while (*atomp != XKB_ATOM_NONE) {
|
if (atom == XKB_ATOM_NONE) {
|
||||||
struct atom_node *node = &darray_item(table->table, *atomp);
|
table->index[index_pos] = j;
|
||||||
|
break;
|
||||||
if (fingerprint > node->fingerprint) {
|
|
||||||
atomp = &node->right;
|
|
||||||
}
|
}
|
||||||
else if (fingerprint < node->fingerprint) {
|
|
||||||
atomp = &node->left;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* Now start testing the strings. */
|
|
||||||
const int cmp = strncmp(string, node->string, len);
|
|
||||||
if (likely(cmp == 0 && node->string[len] == '\0')) {
|
|
||||||
return *atomp;
|
|
||||||
}
|
|
||||||
else if (cmp > 0) {
|
|
||||||
atomp = &node->right;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
atomp = &node->left;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!add)
|
uint32_t hash = hash_buf(string, len);
|
||||||
|
for (size_t i = 0; i < table->index_size; i++) {
|
||||||
|
size_t index_pos = (hash + i) & (table->index_size - 1);
|
||||||
|
if (index_pos == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
xkb_atom_t existing_atom = table->index[index_pos];
|
||||||
|
if (existing_atom == XKB_ATOM_NONE) {
|
||||||
|
if (add) {
|
||||||
|
xkb_atom_t new_atom = darray_size(table->strings);
|
||||||
|
darray_append(table->strings, strndup(string, len));
|
||||||
|
table->index[index_pos] = new_atom;
|
||||||
|
return new_atom;
|
||||||
|
} else {
|
||||||
return XKB_ATOM_NONE;
|
return XKB_ATOM_NONE;
|
||||||
|
}
|
||||||
struct atom_node node;
|
}
|
||||||
node.string = strndup(string, len);
|
|
||||||
assert(node.string != NULL);
|
const char *existing_value = darray_item(table->strings, existing_atom);
|
||||||
node.left = node.right = XKB_ATOM_NONE;
|
if (strncmp(existing_value, string, len) == 0 && existing_value[len] == '\0')
|
||||||
node.fingerprint = fingerprint;
|
return existing_atom;
|
||||||
xkb_atom_t atom = darray_size(table->table);
|
}
|
||||||
/* Do this before the append, as it may realloc and change the offsets. */
|
|
||||||
*atomp = atom;
|
assert(!"couldn't find an empty slot during probing");
|
||||||
darray_append(table->table, node);
|
|
||||||
return atom;
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue