xref: /openbmc/u-boot/lib/hashtable.c (revision 544d97e9)
1 /*
2  * This implementation is based on code from uClibc-0.9.30.3 but was
3  * modified and extended for use within U-Boot.
4  *
5  * Copyright (C) 2010 Wolfgang Denk <wd@denx.de>
6  *
7  * Original license header:
8  *
9  * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc.
10  * This file is part of the GNU C Library.
11  * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993.
12  *
13  * The GNU C Library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * The GNU C Library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with the GNU C Library; if not, write to the Free
25  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26  * 02111-1307 USA.
27  */
28 
29 #include <errno.h>
30 #include <malloc.h>
31 
32 #ifdef USE_HOSTCC		/* HOST build */
33 # include <string.h>
34 # include <assert.h>
35 
36 # ifndef debug
37 #  ifdef DEBUG
38 #   define debug(fmt,args...)	printf(fmt ,##args)
39 #  else
40 #   define debug(fmt,args...)
41 #  endif
42 # endif
43 #else				/* U-Boot build */
44 # include <common.h>
45 # include <linux/string.h>
46 #endif
47 
48 #ifndef	CONFIG_ENV_MAX_ENTRIES	/* maximum number of entries */
49 #define	CONFIG_ENV_MAX_ENTRIES 512
50 #endif
51 
52 #include "search.h"
53 
54 /*
55  * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
56  * [Knuth]            The Art of Computer Programming, part 3 (6.4)
57  */
58 
59 /*
60  * The non-reentrant version use a global space for storing the hash table.
61  */
62 static struct hsearch_data htab;
63 
64 /*
65  * The reentrant version has no static variables to maintain the state.
66  * Instead the interface of all functions is extended to take an argument
67  * which describes the current status.
68  */
69 typedef struct _ENTRY {
70 	unsigned int used;
71 	ENTRY entry;
72 } _ENTRY;
73 
74 
75 /*
76  * hcreate()
77  */
78 
79 /*
80  * For the used double hash method the table size has to be a prime. To
81  * correct the user given table size we need a prime test.  This trivial
82  * algorithm is adequate because
83  * a)  the code is (most probably) called a few times per program run and
84  * b)  the number is small because the table must fit in the core
85  * */
86 static int isprime(unsigned int number)
87 {
88 	/* no even number will be passed */
89 	unsigned int div = 3;
90 
91 	while (div * div < number && number % div != 0)
92 		div += 2;
93 
94 	return number % div != 0;
95 }
96 
97 int hcreate(size_t nel)
98 {
99 	return hcreate_r(nel, &htab);
100 }
101 
102 /*
103  * Before using the hash table we must allocate memory for it.
104  * Test for an existing table are done. We allocate one element
105  * more as the found prime number says. This is done for more effective
106  * indexing as explained in the comment for the hsearch function.
107  * The contents of the table is zeroed, especially the field used
108  * becomes zero.
109  */
110 int hcreate_r(size_t nel, struct hsearch_data *htab)
111 {
112 	/* Test for correct arguments.  */
113 	if (htab == NULL) {
114 		__set_errno(EINVAL);
115 		return 0;
116 	}
117 
118 	/* There is still another table active. Return with error. */
119 	if (htab->table != NULL)
120 		return 0;
121 
122 	/* Change nel to the first prime number not smaller as nel. */
123 	nel |= 1;		/* make odd */
124 	while (!isprime(nel))
125 		nel += 2;
126 
127 	htab->size = nel;
128 	htab->filled = 0;
129 
130 	/* allocate memory and zero out */
131 	htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY));
132 	if (htab->table == NULL)
133 		return 0;
134 
135 	/* everything went alright */
136 	return 1;
137 }
138 
139 
140 /*
141  * hdestroy()
142  */
143 void hdestroy(void)
144 {
145 	hdestroy_r(&htab);
146 }
147 
148 /*
149  * After using the hash table it has to be destroyed. The used memory can
150  * be freed and the local static variable can be marked as not used.
151  */
152 void hdestroy_r(struct hsearch_data *htab)
153 {
154 	int i;
155 
156 	/* Test for correct arguments.  */
157 	if (htab == NULL) {
158 		__set_errno(EINVAL);
159 		return;
160 	}
161 
162 	/* free used memory */
163 	for (i = 1; i <= htab->size; ++i) {
164 		if (htab->table[i].used) {
165 			ENTRY *ep = &htab->table[i].entry;
166 
167 			free(ep->key);
168 			free(ep->data);
169 		}
170 	}
171 	free(htab->table);
172 
173 	/* the sign for an existing table is an value != NULL in htable */
174 	htab->table = NULL;
175 }
176 
177 /*
178  * hsearch()
179  */
180 
181 /*
182  * This is the search function. It uses double hashing with open addressing.
183  * The argument item.key has to be a pointer to an zero terminated, most
184  * probably strings of chars. The function for generating a number of the
185  * strings is simple but fast. It can be replaced by a more complex function
186  * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
187  *
188  * We use an trick to speed up the lookup. The table is created by hcreate
189  * with one more element available. This enables us to use the index zero
190  * special. This index will never be used because we store the first hash
191  * index in the field used where zero means not used. Every other value
192  * means used. The used field can be used as a first fast comparison for
193  * equality of the stored and the parameter value. This helps to prevent
194  * unnecessary expensive calls of strcmp.
195  *
196  * This implementation differs from the standard library version of
197  * this function in a number of ways:
198  *
199  * - While the standard version does not make any assumptions about
200  *   the type of the stored data objects at all, this implementation
201  *   works with NUL terminated strings only.
202  * - Instead of storing just pointers to the original objects, we
203  *   create local copies so the caller does not need to care about the
204  *   data any more.
205  * - The standard implementation does not provide a way to update an
206  *   existing entry.  This version will create a new entry or update an
207  *   existing one when both "action == ENTER" and "item.data != NULL".
208  * - Instead of returning 1 on success, we return the index into the
209  *   internal hash table, which is also guaranteed to be positive.
210  *   This allows us direct access to the found hash table slot for
211  *   example for functions like hdelete().
212  */
213 
214 ENTRY *hsearch(ENTRY item, ACTION action)
215 {
216 	ENTRY *result;
217 
218 	(void) hsearch_r(item, action, &result, &htab);
219 
220 	return result;
221 }
222 
223 int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
224 	      struct hsearch_data *htab)
225 {
226 	unsigned int hval;
227 	unsigned int count;
228 	unsigned int len = strlen(item.key);
229 	unsigned int idx;
230 
231 	/* Compute an value for the given string. Perhaps use a better method. */
232 	hval = len;
233 	count = len;
234 	while (count-- > 0) {
235 		hval <<= 4;
236 		hval += item.key[count];
237 	}
238 
239 	/*
240 	 * First hash function:
241 	 * simply take the modul but prevent zero.
242 	 */
243 	hval %= htab->size;
244 	if (hval == 0)
245 		++hval;
246 
247 	/* The first index tried. */
248 	idx = hval;
249 
250 	if (htab->table[idx].used) {
251 		/*
252                  * Further action might be required according to the
253 		 * action value.
254 		 */
255 		unsigned hval2;
256 
257 		if (htab->table[idx].used == hval
258 		    && strcmp(item.key, htab->table[idx].entry.key) == 0) {
259 			/* Overwrite existing value? */
260 			if ((action == ENTER) && (item.data != NULL)) {
261 				free(htab->table[idx].entry.data);
262 				htab->table[idx].entry.data =
263 					strdup(item.data);
264 				if (!htab->table[idx].entry.data) {
265 					__set_errno(ENOMEM);
266 					*retval = NULL;
267 					return 0;
268 				}
269 			}
270 			/* return found entry */
271 			*retval = &htab->table[idx].entry;
272 			return idx;
273 		}
274 
275 		/*
276 		 * Second hash function:
277 		 * as suggested in [Knuth]
278 		 */
279 		hval2 = 1 + hval % (htab->size - 2);
280 
281 		do {
282 			/*
283                          * Because SIZE is prime this guarantees to
284                          * step through all available indices.
285 			 */
286 			if (idx <= hval2)
287 				idx = htab->size + idx - hval2;
288 			else
289 				idx -= hval2;
290 
291 			/*
292 			 * If we visited all entries leave the loop
293 			 * unsuccessfully.
294 			 */
295 			if (idx == hval)
296 				break;
297 
298 			/* If entry is found use it. */
299 			if ((htab->table[idx].used == hval)
300 			    && strcmp(item.key, htab->table[idx].entry.key) == 0) {
301 				/* Overwrite existing value? */
302 				if ((action == ENTER) && (item.data != NULL)) {
303 					free(htab->table[idx].entry.data);
304 					htab->table[idx].entry.data =
305 						strdup(item.data);
306 					if (!htab->table[idx].entry.data) {
307 						__set_errno(ENOMEM);
308 						*retval = NULL;
309 						return 0;
310 					}
311 				}
312 				/* return found entry */
313 				*retval = &htab->table[idx].entry;
314 				return idx;
315 			}
316 		}
317 		while (htab->table[idx].used);
318 	}
319 
320 	/* An empty bucket has been found. */
321 	if (action == ENTER) {
322 		/*
323                  * If table is full and another entry should be
324                  * entered return with error.
325 		 */
326 		if (htab->filled == htab->size) {
327 			__set_errno(ENOMEM);
328 			*retval = NULL;
329 			return 0;
330 		}
331 
332 		/*
333 		 * Create new entry;
334 		 * create copies of item.key and item.data
335 		 */
336 		htab->table[idx].used = hval;
337 		htab->table[idx].entry.key = strdup(item.key);
338 		htab->table[idx].entry.data = strdup(item.data);
339 		if (!htab->table[idx].entry.key ||
340 		    !htab->table[idx].entry.data) {
341 			__set_errno(ENOMEM);
342 			*retval = NULL;
343 			return 0;
344 		}
345 
346 		++htab->filled;
347 
348 		/* return new entry */
349 		*retval = &htab->table[idx].entry;
350 		return 1;
351 	}
352 
353 	__set_errno(ESRCH);
354 	*retval = NULL;
355 	return 0;
356 }
357 
358 
359 /*
360  * hdelete()
361  */
362 
363 /*
364  * The standard implementation of hsearch(3) does not provide any way
365  * to delete any entries from the hash table.  We extend the code to
366  * do that.
367  */
368 
369 int hdelete(const char *key)
370 {
371 	return hdelete_r(key, &htab);
372 }
373 
374 int hdelete_r(const char *key, struct hsearch_data *htab)
375 {
376 	ENTRY e, *ep;
377 	int idx;
378 
379 	debug("hdelete: DELETE key \"%s\"\n", key);
380 
381 	e.key = (char *)key;
382 
383 	if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
384 		__set_errno(ESRCH);
385 		return 0;	/* not found */
386 	}
387 
388 	/* free used ENTRY */
389 	debug("hdelete: DELETING key \"%s\"\n", key);
390 
391 	free(ep->key);
392 	free(ep->data);
393 	htab->table[idx].used = 0;
394 
395 	--htab->filled;
396 
397 	return 1;
398 }
399 
400 /*
401  * hexport()
402  */
403 
404 /*
405  * Export the data stored in the hash table in linearized form.
406  *
407  * Entries are exported as "name=value" strings, separated by an
408  * arbitrary (non-NUL, of course) separator character. This allows to
409  * use this function both when formatting the U-Boot environment for
410  * external storage (using '\0' as separator), but also when using it
411  * for the "printenv" command to print all variables, simply by using
412  * as '\n" as separator. This can also be used for new features like
413  * exporting the environment data as text file, including the option
414  * for later re-import.
415  *
416  * The entries in the result list will be sorted by ascending key
417  * values.
418  *
419  * If the separator character is different from NUL, then any
420  * separator characters and backslash characters in the values will
421  * be escaped by a preceeding backslash in output. This is needed for
422  * example to enable multi-line values, especially when the output
423  * shall later be parsed (for example, for re-import).
424  *
425  * There are several options how the result buffer is handled:
426  *
427  * *resp  size
428  * -----------
429  *  NULL    0	A string of sufficient length will be allocated.
430  *  NULL   >0	A string of the size given will be
431  *		allocated. An error will be returned if the size is
432  *		not sufficient.  Any unused bytes in the string will
433  *		be '\0'-padded.
434  * !NULL    0	The user-supplied buffer will be used. No length
435  *		checking will be performed, i. e. it is assumed that
436  *		the buffer size will always be big enough. DANGEROUS.
437  * !NULL   >0	The user-supplied buffer will be used. An error will
438  *		be returned if the size is not sufficient.  Any unused
439  *		bytes in the string will be '\0'-padded.
440  */
441 
442 ssize_t hexport(const char sep, char **resp, size_t size)
443 {
444 	return hexport_r(&htab, sep, resp, size);
445 }
446 
447 static int cmpkey(const void *p1, const void *p2)
448 {
449 	ENTRY *e1 = *(ENTRY **) p1;
450 	ENTRY *e2 = *(ENTRY **) p2;
451 
452 	return (strcmp(e1->key, e2->key));
453 }
454 
455 ssize_t hexport_r(struct hsearch_data *htab, const char sep,
456 		 char **resp, size_t size)
457 {
458 	ENTRY *list[htab->size];
459 	char *res, *p;
460 	size_t totlen;
461 	int i, n;
462 
463 	/* Test for correct arguments.  */
464 	if ((resp == NULL) || (htab == NULL)) {
465 		__set_errno(EINVAL);
466 		return (-1);
467 	}
468 
469 	debug("EXPORT  table = %p, htab.size = %d, htab.filled = %d, size = %d\n",
470 		htab, htab->size, htab->filled, size);
471 	/*
472 	 * Pass 1:
473 	 * search used entries,
474 	 * save addresses and compute total length
475 	 */
476 	for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
477 
478 		if (htab->table[i].used) {
479 			ENTRY *ep = &htab->table[i].entry;
480 
481 			list[n++] = ep;
482 
483 			totlen += strlen(ep->key) + 2;
484 
485 			if (sep == '\0') {
486 				totlen += strlen(ep->data);
487 			} else {	/* check if escapes are needed */
488 				char *s = ep->data;
489 
490 				while (*s) {
491 					++totlen;
492 					/* add room for needed escape chars */
493 					if ((*s == sep) || (*s == '\\'))
494 						++totlen;
495 					++s;
496 				}
497 			}
498 			totlen += 2;	/* for '=' and 'sep' char */
499 		}
500 	}
501 
502 #ifdef DEBUG
503 	/* Pass 1a: print unsorted list */
504 	printf("Unsorted: n=%d\n", n);
505 	for (i = 0; i < n; ++i) {
506 		printf("\t%3d: %p ==> %-10s => %s\n",
507 		       i, list[i], list[i]->key, list[i]->data);
508 	}
509 #endif
510 
511 	/* Sort list by keys */
512 	qsort(list, n, sizeof(ENTRY *), cmpkey);
513 
514 	/* Check if the user supplied buffer size is sufficient */
515 	if (size) {
516 		if (size < totlen + 1) {	/* provided buffer too small */
517 			debug("### buffer too small: %d, but need %d\n",
518 				size, totlen + 1);
519 			__set_errno(ENOMEM);
520 			return (-1);
521 		}
522 	} else {
523 		size = totlen + 1;
524 	}
525 
526 	/* Check if the user provided a buffer */
527 	if (*resp) {
528 		/* yes; clear it */
529 		res = *resp;
530 		memset(res, '\0', size);
531 	} else {
532 		/* no, allocate and clear one */
533 		*resp = res = calloc(1, size);
534 		if (res == NULL) {
535 			__set_errno(ENOMEM);
536 			return (-1);
537 		}
538 	}
539 	/*
540 	 * Pass 2:
541 	 * export sorted list of result data
542 	 */
543 	for (i = 0, p = res; i < n; ++i) {
544 		char *s;
545 
546 		s = list[i]->key;
547 		while (*s)
548 			*p++ = *s++;
549 		*p++ = '=';
550 
551 		s = list[i]->data;
552 
553 		while (*s) {
554 			if ((*s == sep) || (*s == '\\'))
555 				*p++ = '\\';	/* escape */
556 			*p++ = *s++;
557 		}
558 		*p++ = sep;
559 	}
560 	*p = '\0';		/* terminate result */
561 
562 	return size;
563 }
564 
565 
566 /*
567  * himport()
568  */
569 
570 /*
571  * Import linearized data into hash table.
572  *
573  * This is the inverse function to hexport(): it takes a linear list
574  * of "name=value" pairs and creates hash table entries from it.
575  *
576  * Entries without "value", i. e. consisting of only "name" or
577  * "name=", will cause this entry to be deleted from the hash table.
578  *
579  * The "flag" argument can be used to control the behaviour: when the
580  * H_NOCLEAR bit is set, then an existing hash table will kept, i. e.
581  * new data will be added to an existing hash table; otherwise, old
582  * data will be discarded and a new hash table will be created.
583  *
584  * The separator character for the "name=value" pairs can be selected,
585  * so we both support importing from externally stored environment
586  * data (separated by NUL characters) and from plain text files
587  * (entries separated by newline characters).
588  *
589  * To allow for nicely formatted text input, leading white space
590  * (sequences of SPACE and TAB chars) is ignored, and entries starting
591  * (after removal of any leading white space) with a '#' character are
592  * considered comments and ignored.
593  *
594  * [NOTE: this means that a variable name cannot start with a '#'
595  * character.]
596  *
597  * When using a non-NUL separator character, backslash is used as
598  * escape character in the value part, allowing for example for
599  * multi-line values.
600  *
601  * In theory, arbitrary separator characters can be used, but only
602  * '\0' and '\n' have really been tested.
603  */
604 
605 int himport(const char *env, size_t size, const char sep, int flag)
606 {
607 	return himport_r(&htab, env, size, sep, flag);
608 }
609 
610 int himport_r(struct hsearch_data *htab,
611 	      const char *env, size_t size, const char sep, int flag)
612 {
613 	char *data, *sp, *dp, *name, *value;
614 
615 	/* Test for correct arguments.  */
616 	if (htab == NULL) {
617 		__set_errno(EINVAL);
618 		return 0;
619 	}
620 
621 	/* we allocate new space to make sure we can write to the array */
622 	if ((data = malloc(size)) == NULL) {
623 		debug("himport_r: can't malloc %d bytes\n", size);
624 		__set_errno(ENOMEM);
625 		return 0;
626 	}
627 	memcpy(data, env, size);
628 	dp = data;
629 
630 	if ((flag & H_NOCLEAR) == 0) {
631 		/* Destroy old hash table if one exists */
632 		debug("Destroy Hash Table: %p table = %p\n", htab,
633 		       htab->table);
634 		if (htab->table)
635 			hdestroy_r(htab);
636 	}
637 
638 	/*
639 	 * Create new hash table (if needed).  The computation of the hash
640 	 * table size is based on heuristics: in a sample of some 70+
641 	 * existing systems we found an average size of 39+ bytes per entry
642 	 * in the environment (for the whole key=value pair). Assuming a
643 	 * size of 8 per entry (= safety factor of ~5) should provide enough
644 	 * safety margin for any existing environment definitions and still
645 	 * allow for more than enough dynamic additions. Note that the
646 	 * "size" argument is supposed to give the maximum enviroment size
647 	 * (CONFIG_ENV_SIZE).  This heuristics will result in
648 	 * unreasonably large numbers (and thus memory footprint) for
649 	 * big flash environments (>8,000 entries for 64 KB
650 	 * envrionment size), so we clip it to a reasonable value
651 	 * (which can be overwritten in the board config file if
652 	 * needed).
653 	 */
654 
655 	if (!htab->table) {
656 		int nent = size / 8;
657 
658 		if (nent > CONFIG_ENV_MAX_ENTRIES)
659 			nent = CONFIG_ENV_MAX_ENTRIES;
660 
661 		debug("Create Hash Table: N=%d\n", nent);
662 
663 		if (hcreate_r(nent, htab) == 0) {
664 			free(data);
665 			return 0;
666 		}
667 	}
668 
669 	/* Parse environment; allow for '\0' and 'sep' as separators */
670 	do {
671 		ENTRY e, *rv;
672 
673 		/* skip leading white space */
674 		while ((*dp == ' ') || (*dp == '\t'))
675 			++dp;
676 
677 		/* skip comment lines */
678 		if (*dp == '#') {
679 			while (*dp && (*dp != sep))
680 				++dp;
681 			++dp;
682 			continue;
683 		}
684 
685 		/* parse name */
686 		for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp)
687 			;
688 
689 		/* deal with "name" and "name=" entries (delete var) */
690 		if (*dp == '\0' || *(dp + 1) == '\0' ||
691 		    *dp == sep || *(dp + 1) == sep) {
692 			if (*dp == '=')
693 				*dp++ = '\0';
694 			*dp++ = '\0';	/* terminate name */
695 
696 			debug("DELETE CANDIDATE: \"%s\"\n", name);
697 
698 			if (hdelete_r(name, htab) == 0)
699 				debug("DELETE ERROR ##############################\n");
700 
701 			continue;
702 		}
703 		*dp++ = '\0';	/* terminate name */
704 
705 		/* parse value; deal with escapes */
706 		for (value = sp = dp; *dp && (*dp != sep); ++dp) {
707 			if ((*dp == '\\') && *(dp + 1))
708 				++dp;
709 			*sp++ = *dp;
710 		}
711 		*sp++ = '\0';	/* terminate value */
712 		++dp;
713 
714 		/* enter into hash table */
715 		e.key = name;
716 		e.data = value;
717 
718 		hsearch_r(e, ENTER, &rv, htab);
719 		if (rv == NULL) {
720 			printf("himport_r: can't insert \"%s=%s\" into hash table\n",
721 				name, value);
722 			return 0;
723 		}
724 
725 		debug("INSERT: table %p, filled %d/%d rv %p ==> name=\"%s\" value=\"%s\"\n",
726 			htab, htab->filled, htab->size,
727 			rv, name, value);
728 	} while ((dp < data + size) && *dp);	/* size check needed for text */
729 						/* without '\0' termination */
730 	debug("INSERT: free(data = %p)\n", data);
731 	free(data);
732 
733 	debug("INSERT: done\n");
734 	return 1;		/* everything OK */
735 }
736