Branch data Line data Source code
1 : : /*
2 : : * Mixed hash table / binary tree code.
3 : : * (c) Thomas Pornin 2002
4 : : *
5 : : * Redistribution and use in source and binary forms, with or without
6 : : * modification, are permitted provided that the following conditions
7 : : * are met:
8 : : * 1. Redistributions of source code must retain the above copyright
9 : : * notice, this list of conditions and the following disclaimer.
10 : : * 2. Redistributions in binary form must reproduce the above copyright
11 : : * notice, this list of conditions and the following disclaimer in the
12 : : * documentation and/or other materials provided with the distribution.
13 : : * 4. The name of the authors may not be used to endorse or promote
14 : : * products derived from this software without specific prior written
15 : : * permission.
16 : : *
17 : : * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
18 : : * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 : : * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 : : * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
21 : : * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 : : * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
23 : : * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 : : * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 : : * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 : : * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 : : * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 : : *
29 : : */
30 : :
31 : : #include <stddef.h>
32 : : #include <string.h>
33 : : #include <limits.h>
34 : : #include "nhash.h"
35 : : #include "mem.h"
36 : :
37 : : /*
38 : : * Hash a string into an `unsigned' value. This function is derived
39 : : * from the hash function used in the ELF binary object file format
40 : : * hash tables. The result size is a 32-bit number if the `unsigned'
41 : : * type is big enough to hold 32-bit arbitrary numbers, a 16-bit number
42 : : * otherwise.
43 : : */
44 : 3135904 : static unsigned hash_string(char *name)
45 : : {
46 : 3135904 : unsigned h = 0;
47 : :
48 [ + + ]: 54983800 : for (h = 0; *name; name ++) {
49 : : unsigned g;
50 : :
51 : 51847896 : h = (h << 4) + *(unsigned char *)name;
52 : : #if UINT_MAX >= 0xffffffffU
53 : 51847896 : g = h & 0xF0000000U;
54 : 51847896 : h ^= (g >> 24);
55 : : #else
56 : : g = h & 0xF000U;
57 : : h ^= (g >> 12);
58 : : #endif
59 : 51847896 : h &= ~g;
60 : : }
61 : 3135904 : return h;
62 : : }
63 : :
64 : : /*
65 : : * Each item in the table is a structure beginning with a `hash_item_header'
66 : : * structure. Those headers define binary trees such that all left-descendants
67 : : * (respectively right-descendants) of a given tree node have an associated
68 : : * hash value strictly smaller (respectively greater) than the hash value
69 : : * associated with this node.
70 : : *
71 : : * The `ident' field points to an array of char. The `sizeof(unsigned)'
72 : : * first `char' contain a copy of an `unsigned' value which is the hashed
73 : : * string, except the least significant bit. When this bit is set to 0,
74 : : * the node contains the unique item using that hash value. If the bit
75 : : * is set to 1, then there are several items with that hash value.
76 : : *
77 : : * When several items share the same hash value, they are linked together
78 : : * in a linked list by their `left' field. The node contains no data;
79 : : * it is a "fake item".
80 : : *
81 : : * The `char' following the hash value encode the item name for true items.
82 : : * For fake items, they contain the pointer to the first true item of the
83 : : * corresponding link list (suitably aligned).
84 : : *
85 : : * There are HTT_NUM_TREES trees; the items are sorted among trees by the
86 : : * lest significant bits of their hash value.
87 : : */
88 : :
89 : 23912 : static void internal_init(HTT *htt, void (*deldata)(void *), int reduced)
90 : : {
91 : 23912 : htt->deldata = deldata;
92 [ - + ]: 23912 : if (reduced) {
93 : 0 : HTT2 *htt2 = (HTT2 *)htt;
94 : :
95 : 0 : htt2->tree[0] = htt2->tree[1] = NULL;
96 : : } else {
97 : : unsigned u;
98 : :
99 [ + + ]: 3084648 : for (u = 0; u < HTT_NUM_TREES; u ++) htt->tree[u] = NULL;
100 : : }
101 : 23912 : }
102 : :
103 : : /* see nhash.h */
104 : 23912 : void HTT_init(HTT *htt, void (*deldata)(void *))
105 : : {
106 : 23912 : internal_init(htt, deldata, 0);
107 : 23912 : }
108 : :
109 : : /* see nhash.h */
110 : 0 : void HTT2_init(HTT2 *htt, void (*deldata)(void *))
111 : : {
112 : 0 : internal_init((HTT *)htt, deldata, 1);
113 : 0 : }
114 : :
115 : : #define PTR_SHIFT (sizeof(hash_item_header *) * \
116 : : ((sizeof(unsigned) + sizeof(hash_item_header *) - 1) / \
117 : : sizeof(hash_item_header *)))
118 : :
119 : : #define TREE(u) (*(reduced ? ((HTT2 *)htt)->tree + ((u) & 1) \
120 : : : htt->tree + ((u) & (HTT_NUM_TREES - 1))))
121 : :
122 : : /*
123 : : * Find a node for the given hash value. If `father' is not NULL, fill
124 : : * `*father' with a pointer to the node's father.
125 : : * If the return value is NULL, then no existing node was found; if `*father'
126 : : * is also NULL, the tree is empty. If the return value is not NULL but
127 : : * `*father' is NULL, then the found node is the tree root.
128 : : *
129 : : * If `father' is not NULL, then `*leftson' is filled with 1 if the node
130 : : * was looked for as the father left son, 0 otherwise.
131 : : */
132 : 3135904 : static hash_item_header *find_node(HTT *htt, unsigned u,
133 : : hash_item_header **father, int *leftson, int reduced)
134 : : {
135 [ - + ]: 3135904 : hash_item_header *node = TREE(u);
136 : 3135904 : hash_item_header *nodef = NULL;
137 : : int ls;
138 : :
139 : 3135904 : u &= ~1U;
140 [ + + ]: 4235286 : while (node != NULL) {
141 : 1198709 : unsigned v = *(unsigned *)(node->ident);
142 : 1198709 : unsigned w = v & ~1U;
143 : :
144 [ + + ]: 1198709 : if (u == w) break;
145 : 1099382 : nodef = node;
146 [ + + ]: 1099382 : if (u < w) {
147 : 597154 : node = node->left;
148 : 597154 : ls = 1;
149 : : } else {
150 : 502228 : node = node->right;
151 : 502228 : ls = 0;
152 : : }
153 : : }
154 [ + + ]: 3135904 : if (father != NULL) {
155 : 253892 : *father = nodef;
156 : 253892 : *leftson = ls;
157 : : }
158 : 3135904 : return node;
159 : : }
160 : :
161 : 2882012 : static void *internal_get(HTT *htt, char *name, int reduced)
162 : : {
163 : 2882012 : unsigned u = hash_string(name), v;
164 : 2882012 : hash_item_header *node = find_node(htt, u, NULL, NULL, reduced);
165 : :
166 [ + + ]: 2882012 : if (node == NULL) return NULL;
167 : 99327 : v = *(unsigned *)(node->ident);
168 [ + - ]: 99327 : if ((v & 1U) == 0) {
169 [ + - ]: 99327 : return (strcmp(HASH_ITEM_NAME(node), name) == 0) ? node : NULL;
170 : : }
171 : 0 : node = *(hash_item_header **)(node->ident + PTR_SHIFT);
172 [ # # ]: 0 : while (node != NULL) {
173 [ # # ]: 0 : if (strcmp(HASH_ITEM_NAME(node), name) == 0) return node;
174 : 0 : node = node->left;
175 : : }
176 : 2882012 : return NULL;
177 : : }
178 : :
179 : : /* see nhash.h */
180 : 2882012 : void *HTT_get(HTT *htt, char *name)
181 : : {
182 : 2882012 : return internal_get(htt, name, 0);
183 : : }
184 : :
185 : : /* see nhash.h */
186 : 0 : void *HTT2_get(HTT2 *htt, char *name)
187 : : {
188 : 0 : return internal_get((HTT *)htt, name, 1);
189 : : }
190 : :
191 : : /*
192 : : * Make an item identifier from its name and its hash value.
193 : : */
194 : 253892 : static char *make_ident(char *name, unsigned u)
195 : : {
196 : 253892 : size_t n = strlen(name) + 1;
197 : 253892 : char *ident = getmem(n + sizeof(unsigned));
198 : :
199 : 253892 : *(unsigned *)ident = u & ~1U;
200 : 253892 : memcpy(ident + sizeof(unsigned), name, n);
201 : 253892 : return ident;
202 : : }
203 : :
204 : : /*
205 : : * Make an identifier for a fake item, pointing to a true item.
206 : : */
207 : 0 : static char *make_fake_ident(unsigned u, hash_item_header *next)
208 : : {
209 : 0 : char *ident = getmem(PTR_SHIFT + sizeof(hash_item_header *));
210 : :
211 : 0 : *(unsigned *)ident = u | 1U;
212 : 0 : *(hash_item_header **)(ident + PTR_SHIFT) = next;
213 : 0 : return ident;
214 : : }
215 : :
216 : : /*
217 : : * Adding an item is straightforward:
218 : : * 1. look for its emplacement
219 : : * 2. if no node is found, use the item as a new node and link it to the tree
220 : : * 3. if a node is found:
221 : : * 3.1. if the node is real, check for name inequality, then create a
222 : : * fake node and assemble the two-element linked list
223 : : * 3.2. if the node is fake, look for the name in the list; if not found,
224 : : * add the node at the list end
225 : : */
226 : 253892 : static void *internal_put(HTT *htt, void *item, char *name, int reduced)
227 : : {
228 : 253892 : unsigned u = hash_string(name), v;
229 : : int ls;
230 : : hash_item_header *father;
231 : 253892 : hash_item_header *node = find_node(htt, u, &father, &ls, reduced);
232 : 253892 : hash_item_header *itemg = item, *pnode;
233 : :
234 [ + - ]: 253892 : if (node == NULL) {
235 : 253892 : itemg->left = itemg->right = NULL;
236 : 253892 : itemg->ident = make_ident(name, u);
237 [ + + ]: 253892 : if (father == NULL) {
238 [ - + ]: 91978 : TREE(u) = itemg;
239 [ + + ]: 161914 : } else if (ls) {
240 : 86988 : father->left = itemg;
241 : : } else {
242 : 74926 : father->right = itemg;
243 : : }
244 : 253892 : return NULL;
245 : : }
246 : 0 : v = *(unsigned *)(node->ident);
247 [ # # ]: 0 : if ((v & 1U) == 0) {
248 [ # # ]: 0 : if (strcmp(HASH_ITEM_NAME(node), name) == 0)
249 : 0 : return node;
250 : 0 : pnode = getmem(sizeof *pnode);
251 : 0 : pnode->left = node->left;
252 : 0 : pnode->right = node->right;
253 : 0 : pnode->ident = make_fake_ident(u, node);
254 : 0 : node->left = itemg;
255 : 0 : node->right = NULL;
256 : 0 : itemg->left = itemg->right = NULL;
257 : 0 : itemg->ident = make_ident(name, u);
258 [ # # ]: 0 : if (father == NULL) {
259 [ # # ]: 0 : TREE(u) = pnode;
260 [ # # ]: 0 : } else if (ls) {
261 : 0 : father->left = pnode;
262 : : } else {
263 : 0 : father->right = pnode;
264 : : }
265 : 0 : return NULL;
266 : : }
267 : 0 : node = *(hash_item_header **)(node->ident + PTR_SHIFT);
268 [ # # ]: 0 : while (node != NULL) {
269 [ # # ]: 0 : if (strcmp(HASH_ITEM_NAME(node), name) == 0) return node;
270 : 0 : pnode = node;
271 : 0 : node = node->left;
272 : : }
273 : 0 : itemg->left = itemg->right = NULL;
274 : 0 : itemg->ident = make_ident(name, u);
275 : 0 : pnode->left = itemg;
276 : 253892 : return NULL;
277 : : }
278 : :
279 : : /* see nhash.h */
280 : 253892 : void *HTT_put(HTT *htt, void *item, char *name)
281 : : {
282 : 253892 : return internal_put(htt, item, name, 0);
283 : : }
284 : :
285 : : /* see nhash.h */
286 : 0 : void *HTT2_put(HTT2 *htt, void *item, char *name)
287 : : {
288 : 0 : return internal_put((HTT *)htt, item, name, 1);
289 : : }
290 : :
291 : : /*
292 : : * A fake node subnode list has shrunk to one item only; make the
293 : : * node real again.
294 : : * fnode the fake node
295 : : * node the last remaining node
296 : : * father the fake node father (NULL if the fake node is root)
297 : : * leftson 1 if the fake node is a left son, 0 otehrwise
298 : : * u the hash value for this node
299 : : */
300 : 0 : static void shrink_node(HTT *htt, hash_item_header *fnode,
301 : : hash_item_header *node, hash_item_header *father, int leftson,
302 : : unsigned u, int reduced)
303 : : {
304 : 0 : node->left = fnode->left;
305 : 0 : node->right = fnode->right;
306 [ # # ]: 0 : if (father == NULL) {
307 [ # # ]: 0 : TREE(u) = node;
308 [ # # ]: 0 : } else if (leftson) {
309 : 0 : father->left = node;
310 : : } else {
311 : 0 : father->right = node;
312 : : }
313 : 0 : freemem(fnode->ident);
314 : 0 : freemem(fnode);
315 : 0 : }
316 : :
317 : : /*
318 : : * Deletion algorithm:
319 : : * 1. look for the node; if not found, exit
320 : : * 2. if the node is real:
321 : : * 2.1. check for equality; exit otherwise
322 : : * 2.2. delete the node
323 : : * 2.3. promote the leftest of right descendants or rightest of left
324 : : * descendants
325 : : * 3. if the node is fake:
326 : : * 3.1. check the list items for equality; exit otherwise
327 : : * 3.2. delete the correct item
328 : : * 3.3. if there remains only one item, supress the fake node
329 : : */
330 : 0 : static int internal_del(HTT *htt, char *name, int reduced)
331 : : {
332 : 0 : unsigned u = hash_string(name), v;
333 : : int ls;
334 : : hash_item_header *father;
335 : 0 : hash_item_header *node = find_node(htt, u, &father, &ls, reduced);
336 : : hash_item_header *pnode, *fnode, *znode;
337 : : char *tmp;
338 : :
339 [ # # ]: 0 : if (node == NULL) return 0;
340 : 0 : v = *(unsigned *)(node->ident);
341 [ # # ]: 0 : if ((v & 1U) != 0) {
342 : 0 : fnode = node;
343 : 0 : node = znode = *(hash_item_header **)(node->ident + PTR_SHIFT);
344 : 0 : pnode = NULL;
345 [ # # ]: 0 : while (node != NULL) {
346 [ # # ]: 0 : if (strcmp(HASH_ITEM_NAME(node), name) == 0) break;
347 : 0 : pnode = node;
348 : 0 : node = node->left;
349 : : }
350 [ # # ]: 0 : if (node == NULL) return 0;
351 [ # # ]: 0 : if (pnode == NULL) {
352 : : /*
353 : : * We supress the first item in the list.
354 : : */
355 : 0 : *(hash_item_header **)(fnode->ident + PTR_SHIFT) =
356 : 0 : node->left;
357 [ # # ]: 0 : if (node->left->left == NULL) {
358 : 0 : shrink_node(htt, fnode, node->left,
359 : : father, ls, u, reduced);
360 : : }
361 : : } else {
362 : 0 : pnode->left = node->left;
363 [ # # ][ # # ]: 0 : if (pnode->left == NULL && znode == pnode) {
364 : 0 : shrink_node(htt, fnode, pnode,
365 : : father, ls, u, reduced);
366 : : }
367 : : }
368 : : } else {
369 [ # # ]: 0 : if (strcmp(HASH_ITEM_NAME(node), name) != 0) return 0;
370 [ # # ]: 0 : if (node->left != NULL) {
371 [ # # ]: 0 : for (znode = node, pnode = node->left; pnode->right;
372 : 0 : znode = pnode, pnode = pnode->right);
373 [ # # ]: 0 : if (znode != node) {
374 : 0 : znode->right = pnode->left;
375 : 0 : pnode->left = node->left;
376 : : }
377 : 0 : pnode->right = node->right;
378 [ # # ]: 0 : } else if (node->right != NULL) {
379 [ # # ]: 0 : for (znode = node, pnode = node->right; pnode->left;
380 : 0 : znode = pnode, pnode = pnode->left);
381 [ # # ]: 0 : if (znode != node) {
382 : 0 : znode->left = pnode->right;
383 : 0 : pnode->right = node->right;
384 : : }
385 : 0 : pnode->left = node->left;
386 : 0 : } else pnode = NULL;
387 [ # # ]: 0 : if (father == NULL) {
388 [ # # ]: 0 : TREE(u) = pnode;
389 [ # # ]: 0 : } else if (ls) {
390 : 0 : father->left = pnode;
391 : : } else {
392 : 0 : father->right = pnode;
393 : : }
394 : : }
395 : 0 : tmp = node->ident;
396 : 0 : htt->deldata(node);
397 : 0 : freemem(tmp);
398 : 0 : return 1;
399 : : }
400 : :
401 : : /* see nhash.h */
402 : 0 : int HTT_del(HTT *htt, char *name)
403 : : {
404 : 0 : return internal_del(htt, name, 0);
405 : : }
406 : :
407 : : /* see nhash.h */
408 : 0 : int HTT2_del(HTT2 *htt, char *name)
409 : : {
410 : 0 : return internal_del((HTT *)htt, name, 1);
411 : : }
412 : :
413 : : /*
414 : : * Apply `action()' on all nodes of the tree whose root is given as
415 : : * parameter `node'. If `wipe' is non-zero, the nodes are removed
416 : : * from memory.
417 : : */
418 : 3568520 : static void scan_node(hash_item_header *node, void (*action)(void *), int wipe)
419 : : {
420 : : unsigned v;
421 : :
422 [ + + ]: 3822412 : if (node == NULL) return;
423 : 253892 : scan_node(node->left, action, wipe);
424 : 253892 : scan_node(node->right, action, wipe);
425 : 253892 : v = *(unsigned *)(node->ident);
426 [ - + ]: 253892 : if ((v & 1U) != 0) {
427 : : hash_item_header *pnode, *nnode;
428 : :
429 [ # # ]: 0 : for (pnode = *(hash_item_header **)(node->ident + PTR_SHIFT);
430 : 0 : pnode != NULL; pnode = nnode) {
431 : 0 : char *tmp = pnode->ident;
432 : :
433 : 0 : nnode = pnode->left;
434 : 0 : action(pnode);
435 [ # # ]: 0 : if (wipe) freemem(tmp);
436 : : }
437 [ # # ]: 0 : if (wipe) {
438 : 0 : freemem(node->ident);
439 : 0 : freemem(node);
440 : : }
441 : : } else {
442 : 253892 : char *tmp = node->ident;
443 : :
444 : 253892 : action(node);
445 [ + - ]: 253892 : if (wipe) freemem(tmp);
446 : : }
447 : : }
448 : :
449 : : /* see nhash.h */
450 : 0 : void HTT_scan(HTT *htt, void (*action)(void *))
451 : : {
452 : : unsigned u;
453 : :
454 [ # # ]: 0 : for (u = 0; u < HTT_NUM_TREES; u ++) {
455 : 0 : scan_node(htt->tree[u], action, 0);
456 : : }
457 : 0 : }
458 : :
459 : : /* see nhash.h */
460 : 0 : void HTT2_scan(HTT2 *htt, void (*action)(void *))
461 : : {
462 : 0 : scan_node(htt->tree[0], action, 0);
463 : 0 : scan_node(htt->tree[1], action, 0);
464 : 0 : }
465 : :
466 : : /* see nhash.h */
467 : 23912 : void HTT_kill(HTT *htt)
468 : : {
469 : : unsigned u;
470 : :
471 [ + + ]: 3084648 : for (u = 0; u < HTT_NUM_TREES; u ++) {
472 : 3060736 : scan_node(htt->tree[u], htt->deldata, 1);
473 : : }
474 : 23912 : }
475 : :
476 : : /* see nhash.h */
477 : 0 : void HTT2_kill(HTT2 *htt)
478 : : {
479 : 0 : scan_node(htt->tree[0], htt->deldata, 1);
480 : 0 : scan_node(htt->tree[1], htt->deldata, 1);
481 : 0 : }
|