xref: /openbmc/u-boot/lib/hashtable.c (revision f6c019c4)
1 /*
2  * This implementation is based on code from uClibc-0.9.30.3 but was
3  * modified and extended for use within U-Boot.
4  *
5  * Copyright (C) 2010 Wolfgang Denk <wd@denx.de>
6  *
7  * Original license header:
8  *
9  * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc.
10  * This file is part of the GNU C Library.
11  * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993.
12  *
13  * The GNU C Library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * The GNU C Library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with the GNU C Library; if not, write to the Free
25  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26  * 02111-1307 USA.
27  */
28 
29 #include <errno.h>
30 #include <malloc.h>
31 
32 #ifdef USE_HOSTCC		/* HOST build */
33 # include <string.h>
34 # include <assert.h>
35 
36 # ifndef debug
37 #  ifdef DEBUG
38 #   define debug(fmt,args...)	printf(fmt ,##args)
39 #  else
40 #   define debug(fmt,args...)
41 #  endif
42 # endif
43 #else				/* U-Boot build */
44 # include <common.h>
45 # include <linux/string.h>
46 #endif
47 
48 #ifndef	CONFIG_ENV_MIN_ENTRIES	/* minimum number of entries */
49 #define	CONFIG_ENV_MIN_ENTRIES 64
50 #endif
51 #ifndef	CONFIG_ENV_MAX_ENTRIES	/* maximum number of entries */
52 #define	CONFIG_ENV_MAX_ENTRIES 512
53 #endif
54 
55 #include "search.h"
56 
57 /*
58  * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
59  * [Knuth]	      The Art of Computer Programming, part 3 (6.4)
60  */
61 
62 /*
63  * The reentrant version has no static variables to maintain the state.
64  * Instead the interface of all functions is extended to take an argument
65  * which describes the current status.
66  */
67 typedef struct _ENTRY {
68 	int used;
69 	ENTRY entry;
70 } _ENTRY;
71 
72 
73 /*
74  * hcreate()
75  */
76 
77 /*
78  * For the used double hash method the table size has to be a prime. To
79  * correct the user given table size we need a prime test.  This trivial
80  * algorithm is adequate because
81  * a)  the code is (most probably) called a few times per program run and
82  * b)  the number is small because the table must fit in the core
83  * */
84 static int isprime(unsigned int number)
85 {
86 	/* no even number will be passed */
87 	unsigned int div = 3;
88 
89 	while (div * div < number && number % div != 0)
90 		div += 2;
91 
92 	return number % div != 0;
93 }
94 
95 /*
96  * Before using the hash table we must allocate memory for it.
97  * Test for an existing table are done. We allocate one element
98  * more as the found prime number says. This is done for more effective
99  * indexing as explained in the comment for the hsearch function.
100  * The contents of the table is zeroed, especially the field used
101  * becomes zero.
102  */
103 
104 int hcreate_r(size_t nel, struct hsearch_data *htab)
105 {
106 	/* Test for correct arguments.  */
107 	if (htab == NULL) {
108 		__set_errno(EINVAL);
109 		return 0;
110 	}
111 
112 	/* There is still another table active. Return with error. */
113 	if (htab->table != NULL)
114 		return 0;
115 
116 	/* Change nel to the first prime number not smaller as nel. */
117 	nel |= 1;		/* make odd */
118 	while (!isprime(nel))
119 		nel += 2;
120 
121 	htab->size = nel;
122 	htab->filled = 0;
123 
124 	/* allocate memory and zero out */
125 	htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY));
126 	if (htab->table == NULL)
127 		return 0;
128 
129 	/* everything went alright */
130 	return 1;
131 }
132 
133 
134 /*
135  * hdestroy()
136  */
137 
138 /*
139  * After using the hash table it has to be destroyed. The used memory can
140  * be freed and the local static variable can be marked as not used.
141  */
142 
143 void hdestroy_r(struct hsearch_data *htab)
144 {
145 	int i;
146 
147 	/* Test for correct arguments.  */
148 	if (htab == NULL) {
149 		__set_errno(EINVAL);
150 		return;
151 	}
152 
153 	/* free used memory */
154 	for (i = 1; i <= htab->size; ++i) {
155 		if (htab->table[i].used > 0) {
156 			ENTRY *ep = &htab->table[i].entry;
157 
158 			free((void *)ep->key);
159 			free(ep->data);
160 		}
161 	}
162 	free(htab->table);
163 
164 	/* the sign for an existing table is an value != NULL in htable */
165 	htab->table = NULL;
166 }
167 
168 /*
169  * hsearch()
170  */
171 
172 /*
173  * This is the search function. It uses double hashing with open addressing.
174  * The argument item.key has to be a pointer to an zero terminated, most
175  * probably strings of chars. The function for generating a number of the
176  * strings is simple but fast. It can be replaced by a more complex function
177  * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
178  *
179  * We use an trick to speed up the lookup. The table is created by hcreate
180  * with one more element available. This enables us to use the index zero
181  * special. This index will never be used because we store the first hash
182  * index in the field used where zero means not used. Every other value
183  * means used. The used field can be used as a first fast comparison for
184  * equality of the stored and the parameter value. This helps to prevent
185  * unnecessary expensive calls of strcmp.
186  *
187  * This implementation differs from the standard library version of
188  * this function in a number of ways:
189  *
190  * - While the standard version does not make any assumptions about
191  *   the type of the stored data objects at all, this implementation
192  *   works with NUL terminated strings only.
193  * - Instead of storing just pointers to the original objects, we
194  *   create local copies so the caller does not need to care about the
195  *   data any more.
196  * - The standard implementation does not provide a way to update an
197  *   existing entry.  This version will create a new entry or update an
198  *   existing one when both "action == ENTER" and "item.data != NULL".
199  * - Instead of returning 1 on success, we return the index into the
200  *   internal hash table, which is also guaranteed to be positive.
201  *   This allows us direct access to the found hash table slot for
202  *   example for functions like hdelete().
203  */
204 
205 /*
206  * hstrstr_r - return index to entry whose key and/or data contains match
207  */
208 int hstrstr_r(const char *match, int last_idx, ENTRY ** retval,
209 	      struct hsearch_data *htab)
210 {
211 	unsigned int idx;
212 
213 	for (idx = last_idx + 1; idx < htab->size; ++idx) {
214 		if (htab->table[idx].used <= 0)
215 			continue;
216 		if (strstr(htab->table[idx].entry.key, match) ||
217 		    strstr(htab->table[idx].entry.data, match)) {
218 			*retval = &htab->table[idx].entry;
219 			return idx;
220 		}
221 	}
222 
223 	__set_errno(ESRCH);
224 	*retval = NULL;
225 	return 0;
226 }
227 
228 int hmatch_r(const char *match, int last_idx, ENTRY ** retval,
229 	     struct hsearch_data *htab)
230 {
231 	unsigned int idx;
232 	size_t key_len = strlen(match);
233 
234 	for (idx = last_idx + 1; idx < htab->size; ++idx) {
235 		if (htab->table[idx].used <= 0)
236 			continue;
237 		if (!strncmp(match, htab->table[idx].entry.key, key_len)) {
238 			*retval = &htab->table[idx].entry;
239 			return idx;
240 		}
241 	}
242 
243 	__set_errno(ESRCH);
244 	*retval = NULL;
245 	return 0;
246 }
247 
248 int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
249 	      struct hsearch_data *htab)
250 {
251 	unsigned int hval;
252 	unsigned int count;
253 	unsigned int len = strlen(item.key);
254 	unsigned int idx;
255 	unsigned int first_deleted = 0;
256 
257 	/* Compute an value for the given string. Perhaps use a better method. */
258 	hval = len;
259 	count = len;
260 	while (count-- > 0) {
261 		hval <<= 4;
262 		hval += item.key[count];
263 	}
264 
265 	/*
266 	 * First hash function:
267 	 * simply take the modul but prevent zero.
268 	 */
269 	hval %= htab->size;
270 	if (hval == 0)
271 		++hval;
272 
273 	/* The first index tried. */
274 	idx = hval;
275 
276 	if (htab->table[idx].used) {
277 		/*
278 		 * Further action might be required according to the
279 		 * action value.
280 		 */
281 		unsigned hval2;
282 
283 		if (htab->table[idx].used == -1
284 		    && !first_deleted)
285 			first_deleted = idx;
286 
287 		if (htab->table[idx].used == hval
288 		    && strcmp(item.key, htab->table[idx].entry.key) == 0) {
289 			/* Overwrite existing value? */
290 			if ((action == ENTER) && (item.data != NULL)) {
291 				free(htab->table[idx].entry.data);
292 				htab->table[idx].entry.data =
293 					strdup(item.data);
294 				if (!htab->table[idx].entry.data) {
295 					__set_errno(ENOMEM);
296 					*retval = NULL;
297 					return 0;
298 				}
299 			}
300 			/* return found entry */
301 			*retval = &htab->table[idx].entry;
302 			return idx;
303 		}
304 
305 		/*
306 		 * Second hash function:
307 		 * as suggested in [Knuth]
308 		 */
309 		hval2 = 1 + hval % (htab->size - 2);
310 
311 		do {
312 			/*
313 			 * Because SIZE is prime this guarantees to
314 			 * step through all available indices.
315 			 */
316 			if (idx <= hval2)
317 				idx = htab->size + idx - hval2;
318 			else
319 				idx -= hval2;
320 
321 			/*
322 			 * If we visited all entries leave the loop
323 			 * unsuccessfully.
324 			 */
325 			if (idx == hval)
326 				break;
327 
328 			/* If entry is found use it. */
329 			if ((htab->table[idx].used == hval)
330 			    && strcmp(item.key, htab->table[idx].entry.key) == 0) {
331 				/* Overwrite existing value? */
332 				if ((action == ENTER) && (item.data != NULL)) {
333 					free(htab->table[idx].entry.data);
334 					htab->table[idx].entry.data =
335 						strdup(item.data);
336 					if (!htab->table[idx].entry.data) {
337 						__set_errno(ENOMEM);
338 						*retval = NULL;
339 						return 0;
340 					}
341 				}
342 				/* return found entry */
343 				*retval = &htab->table[idx].entry;
344 				return idx;
345 			}
346 		}
347 		while (htab->table[idx].used);
348 	}
349 
350 	/* An empty bucket has been found. */
351 	if (action == ENTER) {
352 		/*
353 		 * If table is full and another entry should be
354 		 * entered return with error.
355 		 */
356 		if (htab->filled == htab->size) {
357 			__set_errno(ENOMEM);
358 			*retval = NULL;
359 			return 0;
360 		}
361 
362 		/*
363 		 * Create new entry;
364 		 * create copies of item.key and item.data
365 		 */
366 		if (first_deleted)
367 			idx = first_deleted;
368 
369 		htab->table[idx].used = hval;
370 		htab->table[idx].entry.key = strdup(item.key);
371 		htab->table[idx].entry.data = strdup(item.data);
372 		if (!htab->table[idx].entry.key ||
373 		    !htab->table[idx].entry.data) {
374 			__set_errno(ENOMEM);
375 			*retval = NULL;
376 			return 0;
377 		}
378 
379 		++htab->filled;
380 
381 		/* return new entry */
382 		*retval = &htab->table[idx].entry;
383 		return 1;
384 	}
385 
386 	__set_errno(ESRCH);
387 	*retval = NULL;
388 	return 0;
389 }
390 
391 
392 /*
393  * hdelete()
394  */
395 
396 /*
397  * The standard implementation of hsearch(3) does not provide any way
398  * to delete any entries from the hash table.  We extend the code to
399  * do that.
400  */
401 
402 int hdelete_r(const char *key, struct hsearch_data *htab)
403 {
404 	ENTRY e, *ep;
405 	int idx;
406 
407 	debug("hdelete: DELETE key \"%s\"\n", key);
408 
409 	e.key = (char *)key;
410 
411 	if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
412 		__set_errno(ESRCH);
413 		return 0;	/* not found */
414 	}
415 
416 	/* free used ENTRY */
417 	debug("hdelete: DELETING key \"%s\"\n", key);
418 
419 	free((void *)ep->key);
420 	free(ep->data);
421 	htab->table[idx].used = -1;
422 
423 	--htab->filled;
424 
425 	return 1;
426 }
427 
428 /*
429  * hexport()
430  */
431 
432 /*
433  * Export the data stored in the hash table in linearized form.
434  *
435  * Entries are exported as "name=value" strings, separated by an
436  * arbitrary (non-NUL, of course) separator character. This allows to
437  * use this function both when formatting the U-Boot environment for
438  * external storage (using '\0' as separator), but also when using it
439  * for the "printenv" command to print all variables, simply by using
440  * as '\n" as separator. This can also be used for new features like
441  * exporting the environment data as text file, including the option
442  * for later re-import.
443  *
444  * The entries in the result list will be sorted by ascending key
445  * values.
446  *
447  * If the separator character is different from NUL, then any
448  * separator characters and backslash characters in the values will
449  * be escaped by a preceeding backslash in output. This is needed for
450  * example to enable multi-line values, especially when the output
451  * shall later be parsed (for example, for re-import).
452  *
453  * There are several options how the result buffer is handled:
454  *
455  * *resp  size
456  * -----------
457  *  NULL    0	A string of sufficient length will be allocated.
458  *  NULL   >0	A string of the size given will be
459  *		allocated. An error will be returned if the size is
460  *		not sufficient.  Any unused bytes in the string will
461  *		be '\0'-padded.
462  * !NULL    0	The user-supplied buffer will be used. No length
463  *		checking will be performed, i. e. it is assumed that
464  *		the buffer size will always be big enough. DANGEROUS.
465  * !NULL   >0	The user-supplied buffer will be used. An error will
466  *		be returned if the size is not sufficient.  Any unused
467  *		bytes in the string will be '\0'-padded.
468  */
469 
470 static int cmpkey(const void *p1, const void *p2)
471 {
472 	ENTRY *e1 = *(ENTRY **) p1;
473 	ENTRY *e2 = *(ENTRY **) p2;
474 
475 	return (strcmp(e1->key, e2->key));
476 }
477 
478 ssize_t hexport_r(struct hsearch_data *htab, const char sep,
479 		 char **resp, size_t size)
480 {
481 	ENTRY *list[htab->size];
482 	char *res, *p;
483 	size_t totlen;
484 	int i, n;
485 
486 	/* Test for correct arguments.  */
487 	if ((resp == NULL) || (htab == NULL)) {
488 		__set_errno(EINVAL);
489 		return (-1);
490 	}
491 
492 	debug("EXPORT  table = %p, htab.size = %d, htab.filled = %d, size = %d\n",
493 		htab, htab->size, htab->filled, size);
494 	/*
495 	 * Pass 1:
496 	 * search used entries,
497 	 * save addresses and compute total length
498 	 */
499 	for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
500 
501 		if (htab->table[i].used > 0) {
502 			ENTRY *ep = &htab->table[i].entry;
503 
504 			list[n++] = ep;
505 
506 			totlen += strlen(ep->key) + 2;
507 
508 			if (sep == '\0') {
509 				totlen += strlen(ep->data);
510 			} else {	/* check if escapes are needed */
511 				char *s = ep->data;
512 
513 				while (*s) {
514 					++totlen;
515 					/* add room for needed escape chars */
516 					if ((*s == sep) || (*s == '\\'))
517 						++totlen;
518 					++s;
519 				}
520 			}
521 			totlen += 2;	/* for '=' and 'sep' char */
522 		}
523 	}
524 
525 #ifdef DEBUG
526 	/* Pass 1a: print unsorted list */
527 	printf("Unsorted: n=%d\n", n);
528 	for (i = 0; i < n; ++i) {
529 		printf("\t%3d: %p ==> %-10s => %s\n",
530 		       i, list[i], list[i]->key, list[i]->data);
531 	}
532 #endif
533 
534 	/* Sort list by keys */
535 	qsort(list, n, sizeof(ENTRY *), cmpkey);
536 
537 	/* Check if the user supplied buffer size is sufficient */
538 	if (size) {
539 		if (size < totlen + 1) {	/* provided buffer too small */
540 			debug("### buffer too small: %d, but need %d\n",
541 				size, totlen + 1);
542 			__set_errno(ENOMEM);
543 			return (-1);
544 		}
545 	} else {
546 		size = totlen + 1;
547 	}
548 
549 	/* Check if the user provided a buffer */
550 	if (*resp) {
551 		/* yes; clear it */
552 		res = *resp;
553 		memset(res, '\0', size);
554 	} else {
555 		/* no, allocate and clear one */
556 		*resp = res = calloc(1, size);
557 		if (res == NULL) {
558 			__set_errno(ENOMEM);
559 			return (-1);
560 		}
561 	}
562 	/*
563 	 * Pass 2:
564 	 * export sorted list of result data
565 	 */
566 	for (i = 0, p = res; i < n; ++i) {
567 		const char *s;
568 
569 		s = list[i]->key;
570 		while (*s)
571 			*p++ = *s++;
572 		*p++ = '=';
573 
574 		s = list[i]->data;
575 
576 		while (*s) {
577 			if ((*s == sep) || (*s == '\\'))
578 				*p++ = '\\';	/* escape */
579 			*p++ = *s++;
580 		}
581 		*p++ = sep;
582 	}
583 	*p = '\0';		/* terminate result */
584 
585 	return size;
586 }
587 
588 
589 /*
590  * himport()
591  */
592 
593 /*
594  * Import linearized data into hash table.
595  *
596  * This is the inverse function to hexport(): it takes a linear list
597  * of "name=value" pairs and creates hash table entries from it.
598  *
599  * Entries without "value", i. e. consisting of only "name" or
600  * "name=", will cause this entry to be deleted from the hash table.
601  *
602  * The "flag" argument can be used to control the behaviour: when the
603  * H_NOCLEAR bit is set, then an existing hash table will kept, i. e.
604  * new data will be added to an existing hash table; otherwise, old
605  * data will be discarded and a new hash table will be created.
606  *
607  * The separator character for the "name=value" pairs can be selected,
608  * so we both support importing from externally stored environment
609  * data (separated by NUL characters) and from plain text files
610  * (entries separated by newline characters).
611  *
612  * To allow for nicely formatted text input, leading white space
613  * (sequences of SPACE and TAB chars) is ignored, and entries starting
614  * (after removal of any leading white space) with a '#' character are
615  * considered comments and ignored.
616  *
617  * [NOTE: this means that a variable name cannot start with a '#'
618  * character.]
619  *
620  * When using a non-NUL separator character, backslash is used as
621  * escape character in the value part, allowing for example for
622  * multi-line values.
623  *
624  * In theory, arbitrary separator characters can be used, but only
625  * '\0' and '\n' have really been tested.
626  */
627 
628 int himport_r(struct hsearch_data *htab,
629 	      const char *env, size_t size, const char sep, int flag)
630 {
631 	char *data, *sp, *dp, *name, *value;
632 
633 	/* Test for correct arguments.  */
634 	if (htab == NULL) {
635 		__set_errno(EINVAL);
636 		return 0;
637 	}
638 
639 	/* we allocate new space to make sure we can write to the array */
640 	if ((data = malloc(size)) == NULL) {
641 		debug("himport_r: can't malloc %d bytes\n", size);
642 		__set_errno(ENOMEM);
643 		return 0;
644 	}
645 	memcpy(data, env, size);
646 	dp = data;
647 
648 	if ((flag & H_NOCLEAR) == 0) {
649 		/* Destroy old hash table if one exists */
650 		debug("Destroy Hash Table: %p table = %p\n", htab,
651 		       htab->table);
652 		if (htab->table)
653 			hdestroy_r(htab);
654 	}
655 
656 	/*
657 	 * Create new hash table (if needed).  The computation of the hash
658 	 * table size is based on heuristics: in a sample of some 70+
659 	 * existing systems we found an average size of 39+ bytes per entry
660 	 * in the environment (for the whole key=value pair). Assuming a
661 	 * size of 8 per entry (= safety factor of ~5) should provide enough
662 	 * safety margin for any existing environment definitions and still
663 	 * allow for more than enough dynamic additions. Note that the
664 	 * "size" argument is supposed to give the maximum enviroment size
665 	 * (CONFIG_ENV_SIZE).  This heuristics will result in
666 	 * unreasonably large numbers (and thus memory footprint) for
667 	 * big flash environments (>8,000 entries for 64 KB
668 	 * envrionment size), so we clip it to a reasonable value.
669 	 * On the other hand we need to add some more entries for free
670 	 * space when importing very small buffers. Both boundaries can
671 	 * be overwritten in the board config file if needed.
672 	 */
673 
674 	if (!htab->table) {
675 		int nent = CONFIG_ENV_MIN_ENTRIES + size / 8;
676 
677 		if (nent > CONFIG_ENV_MAX_ENTRIES)
678 			nent = CONFIG_ENV_MAX_ENTRIES;
679 
680 		debug("Create Hash Table: N=%d\n", nent);
681 
682 		if (hcreate_r(nent, htab) == 0) {
683 			free(data);
684 			return 0;
685 		}
686 	}
687 
688 	/* Parse environment; allow for '\0' and 'sep' as separators */
689 	do {
690 		ENTRY e, *rv;
691 
692 		/* skip leading white space */
693 		while ((*dp == ' ') || (*dp == '\t'))
694 			++dp;
695 
696 		/* skip comment lines */
697 		if (*dp == '#') {
698 			while (*dp && (*dp != sep))
699 				++dp;
700 			++dp;
701 			continue;
702 		}
703 
704 		/* parse name */
705 		for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp)
706 			;
707 
708 		/* deal with "name" and "name=" entries (delete var) */
709 		if (*dp == '\0' || *(dp + 1) == '\0' ||
710 		    *dp == sep || *(dp + 1) == sep) {
711 			if (*dp == '=')
712 				*dp++ = '\0';
713 			*dp++ = '\0';	/* terminate name */
714 
715 			debug("DELETE CANDIDATE: \"%s\"\n", name);
716 
717 			if (hdelete_r(name, htab) == 0)
718 				debug("DELETE ERROR ##############################\n");
719 
720 			continue;
721 		}
722 		*dp++ = '\0';	/* terminate name */
723 
724 		/* parse value; deal with escapes */
725 		for (value = sp = dp; *dp && (*dp != sep); ++dp) {
726 			if ((*dp == '\\') && *(dp + 1))
727 				++dp;
728 			*sp++ = *dp;
729 		}
730 		*sp++ = '\0';	/* terminate value */
731 		++dp;
732 
733 		/* enter into hash table */
734 		e.key = name;
735 		e.data = value;
736 
737 		hsearch_r(e, ENTER, &rv, htab);
738 		if (rv == NULL) {
739 			printf("himport_r: can't insert \"%s=%s\" into hash table\n",
740 				name, value);
741 			return 0;
742 		}
743 
744 		debug("INSERT: table %p, filled %d/%d rv %p ==> name=\"%s\" value=\"%s\"\n",
745 			htab, htab->filled, htab->size,
746 			rv, name, value);
747 	} while ((dp < data + size) && *dp);	/* size check needed for text */
748 						/* without '\0' termination */
749 	debug("INSERT: free(data = %p)\n", data);
750 	free(data);
751 
752 	debug("INSERT: done\n");
753 	return 1;		/* everything OK */
754 }
755