xref: /openbmc/u-boot/lib/hashtable.c (revision 5187d8dd)
1 /*
2  * This implementation is based on code from uClibc-0.9.30.3 but was
3  * modified and extended for use within U-Boot.
4  *
5  * Copyright (C) 2010 Wolfgang Denk <wd@denx.de>
6  *
7  * Original license header:
8  *
9  * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc.
10  * This file is part of the GNU C Library.
11  * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993.
12  *
13  * The GNU C Library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * The GNU C Library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with the GNU C Library; if not, write to the Free
25  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26  * 02111-1307 USA.
27  */
28 
29 #include <errno.h>
30 #include <malloc.h>
31 
32 #ifdef USE_HOSTCC		/* HOST build */
33 # include <string.h>
34 # include <assert.h>
35 # include <ctype.h>
36 
37 # ifndef debug
38 #  ifdef DEBUG
39 #   define debug(fmt,args...)	printf(fmt ,##args)
40 #  else
41 #   define debug(fmt,args...)
42 #  endif
43 # endif
44 #else				/* U-Boot build */
45 # include <common.h>
46 # include <linux/string.h>
47 # include <linux/ctype.h>
48 #endif
49 
50 #ifndef	CONFIG_ENV_MIN_ENTRIES	/* minimum number of entries */
51 #define	CONFIG_ENV_MIN_ENTRIES 64
52 #endif
53 #ifndef	CONFIG_ENV_MAX_ENTRIES	/* maximum number of entries */
54 #define	CONFIG_ENV_MAX_ENTRIES 512
55 #endif
56 
57 #include "search.h"
58 
59 /*
60  * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
61  * [Knuth]	      The Art of Computer Programming, part 3 (6.4)
62  */
63 
64 /*
65  * The reentrant version has no static variables to maintain the state.
66  * Instead the interface of all functions is extended to take an argument
67  * which describes the current status.
68  */
69 typedef struct _ENTRY {
70 	int used;
71 	ENTRY entry;
72 } _ENTRY;
73 
74 
75 /*
76  * hcreate()
77  */
78 
79 /*
80  * For the used double hash method the table size has to be a prime. To
81  * correct the user given table size we need a prime test.  This trivial
82  * algorithm is adequate because
83  * a)  the code is (most probably) called a few times per program run and
84  * b)  the number is small because the table must fit in the core
85  * */
86 static int isprime(unsigned int number)
87 {
88 	/* no even number will be passed */
89 	unsigned int div = 3;
90 
91 	while (div * div < number && number % div != 0)
92 		div += 2;
93 
94 	return number % div != 0;
95 }
96 
97 /*
98  * Before using the hash table we must allocate memory for it.
99  * Test for an existing table are done. We allocate one element
100  * more as the found prime number says. This is done for more effective
101  * indexing as explained in the comment for the hsearch function.
102  * The contents of the table is zeroed, especially the field used
103  * becomes zero.
104  */
105 
106 int hcreate_r(size_t nel, struct hsearch_data *htab)
107 {
108 	/* Test for correct arguments.  */
109 	if (htab == NULL) {
110 		__set_errno(EINVAL);
111 		return 0;
112 	}
113 
114 	/* There is still another table active. Return with error. */
115 	if (htab->table != NULL)
116 		return 0;
117 
118 	/* Change nel to the first prime number not smaller as nel. */
119 	nel |= 1;		/* make odd */
120 	while (!isprime(nel))
121 		nel += 2;
122 
123 	htab->size = nel;
124 	htab->filled = 0;
125 
126 	/* allocate memory and zero out */
127 	htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY));
128 	if (htab->table == NULL)
129 		return 0;
130 
131 	/* everything went alright */
132 	return 1;
133 }
134 
135 
136 /*
137  * hdestroy()
138  */
139 
140 /*
141  * After using the hash table it has to be destroyed. The used memory can
142  * be freed and the local static variable can be marked as not used.
143  */
144 
145 void hdestroy_r(struct hsearch_data *htab)
146 {
147 	int i;
148 
149 	/* Test for correct arguments.  */
150 	if (htab == NULL) {
151 		__set_errno(EINVAL);
152 		return;
153 	}
154 
155 	/* free used memory */
156 	for (i = 1; i <= htab->size; ++i) {
157 		if (htab->table[i].used > 0) {
158 			ENTRY *ep = &htab->table[i].entry;
159 
160 			free((void *)ep->key);
161 			free(ep->data);
162 		}
163 	}
164 	free(htab->table);
165 
166 	/* the sign for an existing table is an value != NULL in htable */
167 	htab->table = NULL;
168 }
169 
170 /*
171  * hsearch()
172  */
173 
174 /*
175  * This is the search function. It uses double hashing with open addressing.
176  * The argument item.key has to be a pointer to an zero terminated, most
177  * probably strings of chars. The function for generating a number of the
178  * strings is simple but fast. It can be replaced by a more complex function
179  * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
180  *
181  * We use an trick to speed up the lookup. The table is created by hcreate
182  * with one more element available. This enables us to use the index zero
183  * special. This index will never be used because we store the first hash
184  * index in the field used where zero means not used. Every other value
185  * means used. The used field can be used as a first fast comparison for
186  * equality of the stored and the parameter value. This helps to prevent
187  * unnecessary expensive calls of strcmp.
188  *
189  * This implementation differs from the standard library version of
190  * this function in a number of ways:
191  *
192  * - While the standard version does not make any assumptions about
193  *   the type of the stored data objects at all, this implementation
194  *   works with NUL terminated strings only.
195  * - Instead of storing just pointers to the original objects, we
196  *   create local copies so the caller does not need to care about the
197  *   data any more.
198  * - The standard implementation does not provide a way to update an
199  *   existing entry.  This version will create a new entry or update an
200  *   existing one when both "action == ENTER" and "item.data != NULL".
201  * - Instead of returning 1 on success, we return the index into the
202  *   internal hash table, which is also guaranteed to be positive.
203  *   This allows us direct access to the found hash table slot for
204  *   example for functions like hdelete().
205  */
206 
207 /*
208  * hstrstr_r - return index to entry whose key and/or data contains match
209  */
210 int hstrstr_r(const char *match, int last_idx, ENTRY ** retval,
211 	      struct hsearch_data *htab)
212 {
213 	unsigned int idx;
214 
215 	for (idx = last_idx + 1; idx < htab->size; ++idx) {
216 		if (htab->table[idx].used <= 0)
217 			continue;
218 		if (strstr(htab->table[idx].entry.key, match) ||
219 		    strstr(htab->table[idx].entry.data, match)) {
220 			*retval = &htab->table[idx].entry;
221 			return idx;
222 		}
223 	}
224 
225 	__set_errno(ESRCH);
226 	*retval = NULL;
227 	return 0;
228 }
229 
230 int hmatch_r(const char *match, int last_idx, ENTRY ** retval,
231 	     struct hsearch_data *htab)
232 {
233 	unsigned int idx;
234 	size_t key_len = strlen(match);
235 
236 	for (idx = last_idx + 1; idx < htab->size; ++idx) {
237 		if (htab->table[idx].used <= 0)
238 			continue;
239 		if (!strncmp(match, htab->table[idx].entry.key, key_len)) {
240 			*retval = &htab->table[idx].entry;
241 			return idx;
242 		}
243 	}
244 
245 	__set_errno(ESRCH);
246 	*retval = NULL;
247 	return 0;
248 }
249 
250 int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
251 	      struct hsearch_data *htab)
252 {
253 	unsigned int hval;
254 	unsigned int count;
255 	unsigned int len = strlen(item.key);
256 	unsigned int idx;
257 	unsigned int first_deleted = 0;
258 
259 	/* Compute an value for the given string. Perhaps use a better method. */
260 	hval = len;
261 	count = len;
262 	while (count-- > 0) {
263 		hval <<= 4;
264 		hval += item.key[count];
265 	}
266 
267 	/*
268 	 * First hash function:
269 	 * simply take the modul but prevent zero.
270 	 */
271 	hval %= htab->size;
272 	if (hval == 0)
273 		++hval;
274 
275 	/* The first index tried. */
276 	idx = hval;
277 
278 	if (htab->table[idx].used) {
279 		/*
280 		 * Further action might be required according to the
281 		 * action value.
282 		 */
283 		unsigned hval2;
284 
285 		if (htab->table[idx].used == -1
286 		    && !first_deleted)
287 			first_deleted = idx;
288 
289 		if (htab->table[idx].used == hval
290 		    && strcmp(item.key, htab->table[idx].entry.key) == 0) {
291 			/* Overwrite existing value? */
292 			if ((action == ENTER) && (item.data != NULL)) {
293 				free(htab->table[idx].entry.data);
294 				htab->table[idx].entry.data =
295 					strdup(item.data);
296 				if (!htab->table[idx].entry.data) {
297 					__set_errno(ENOMEM);
298 					*retval = NULL;
299 					return 0;
300 				}
301 			}
302 			/* return found entry */
303 			*retval = &htab->table[idx].entry;
304 			return idx;
305 		}
306 
307 		/*
308 		 * Second hash function:
309 		 * as suggested in [Knuth]
310 		 */
311 		hval2 = 1 + hval % (htab->size - 2);
312 
313 		do {
314 			/*
315 			 * Because SIZE is prime this guarantees to
316 			 * step through all available indices.
317 			 */
318 			if (idx <= hval2)
319 				idx = htab->size + idx - hval2;
320 			else
321 				idx -= hval2;
322 
323 			/*
324 			 * If we visited all entries leave the loop
325 			 * unsuccessfully.
326 			 */
327 			if (idx == hval)
328 				break;
329 
330 			/* If entry is found use it. */
331 			if ((htab->table[idx].used == hval)
332 			    && strcmp(item.key, htab->table[idx].entry.key) == 0) {
333 				/* Overwrite existing value? */
334 				if ((action == ENTER) && (item.data != NULL)) {
335 					free(htab->table[idx].entry.data);
336 					htab->table[idx].entry.data =
337 						strdup(item.data);
338 					if (!htab->table[idx].entry.data) {
339 						__set_errno(ENOMEM);
340 						*retval = NULL;
341 						return 0;
342 					}
343 				}
344 				/* return found entry */
345 				*retval = &htab->table[idx].entry;
346 				return idx;
347 			}
348 		}
349 		while (htab->table[idx].used);
350 	}
351 
352 	/* An empty bucket has been found. */
353 	if (action == ENTER) {
354 		/*
355 		 * If table is full and another entry should be
356 		 * entered return with error.
357 		 */
358 		if (htab->filled == htab->size) {
359 			__set_errno(ENOMEM);
360 			*retval = NULL;
361 			return 0;
362 		}
363 
364 		/*
365 		 * Create new entry;
366 		 * create copies of item.key and item.data
367 		 */
368 		if (first_deleted)
369 			idx = first_deleted;
370 
371 		htab->table[idx].used = hval;
372 		htab->table[idx].entry.key = strdup(item.key);
373 		htab->table[idx].entry.data = strdup(item.data);
374 		if (!htab->table[idx].entry.key ||
375 		    !htab->table[idx].entry.data) {
376 			__set_errno(ENOMEM);
377 			*retval = NULL;
378 			return 0;
379 		}
380 
381 		++htab->filled;
382 
383 		/* return new entry */
384 		*retval = &htab->table[idx].entry;
385 		return 1;
386 	}
387 
388 	__set_errno(ESRCH);
389 	*retval = NULL;
390 	return 0;
391 }
392 
393 
394 /*
395  * hdelete()
396  */
397 
398 /*
399  * The standard implementation of hsearch(3) does not provide any way
400  * to delete any entries from the hash table.  We extend the code to
401  * do that.
402  */
403 
404 int hdelete_r(const char *key, struct hsearch_data *htab)
405 {
406 	ENTRY e, *ep;
407 	int idx;
408 
409 	debug("hdelete: DELETE key \"%s\"\n", key);
410 
411 	e.key = (char *)key;
412 
413 	if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
414 		__set_errno(ESRCH);
415 		return 0;	/* not found */
416 	}
417 
418 	/* free used ENTRY */
419 	debug("hdelete: DELETING key \"%s\"\n", key);
420 
421 	free((void *)ep->key);
422 	free(ep->data);
423 	htab->table[idx].used = -1;
424 
425 	--htab->filled;
426 
427 	return 1;
428 }
429 
430 /*
431  * hexport()
432  */
433 
434 /*
435  * Export the data stored in the hash table in linearized form.
436  *
437  * Entries are exported as "name=value" strings, separated by an
438  * arbitrary (non-NUL, of course) separator character. This allows to
439  * use this function both when formatting the U-Boot environment for
440  * external storage (using '\0' as separator), but also when using it
441  * for the "printenv" command to print all variables, simply by using
442  * as '\n" as separator. This can also be used for new features like
443  * exporting the environment data as text file, including the option
444  * for later re-import.
445  *
446  * The entries in the result list will be sorted by ascending key
447  * values.
448  *
449  * If the separator character is different from NUL, then any
450  * separator characters and backslash characters in the values will
451  * be escaped by a preceeding backslash in output. This is needed for
452  * example to enable multi-line values, especially when the output
453  * shall later be parsed (for example, for re-import).
454  *
455  * There are several options how the result buffer is handled:
456  *
457  * *resp  size
458  * -----------
459  *  NULL    0	A string of sufficient length will be allocated.
460  *  NULL   >0	A string of the size given will be
461  *		allocated. An error will be returned if the size is
462  *		not sufficient.  Any unused bytes in the string will
463  *		be '\0'-padded.
464  * !NULL    0	The user-supplied buffer will be used. No length
465  *		checking will be performed, i. e. it is assumed that
466  *		the buffer size will always be big enough. DANGEROUS.
467  * !NULL   >0	The user-supplied buffer will be used. An error will
468  *		be returned if the size is not sufficient.  Any unused
469  *		bytes in the string will be '\0'-padded.
470  */
471 
472 static int cmpkey(const void *p1, const void *p2)
473 {
474 	ENTRY *e1 = *(ENTRY **) p1;
475 	ENTRY *e2 = *(ENTRY **) p2;
476 
477 	return (strcmp(e1->key, e2->key));
478 }
479 
480 ssize_t hexport_r(struct hsearch_data *htab, const char sep,
481 		 char **resp, size_t size)
482 {
483 	ENTRY *list[htab->size];
484 	char *res, *p;
485 	size_t totlen;
486 	int i, n;
487 
488 	/* Test for correct arguments.  */
489 	if ((resp == NULL) || (htab == NULL)) {
490 		__set_errno(EINVAL);
491 		return (-1);
492 	}
493 
494 	debug("EXPORT  table = %p, htab.size = %d, htab.filled = %d, size = %d\n",
495 		htab, htab->size, htab->filled, size);
496 	/*
497 	 * Pass 1:
498 	 * search used entries,
499 	 * save addresses and compute total length
500 	 */
501 	for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
502 
503 		if (htab->table[i].used > 0) {
504 			ENTRY *ep = &htab->table[i].entry;
505 
506 			list[n++] = ep;
507 
508 			totlen += strlen(ep->key) + 2;
509 
510 			if (sep == '\0') {
511 				totlen += strlen(ep->data);
512 			} else {	/* check if escapes are needed */
513 				char *s = ep->data;
514 
515 				while (*s) {
516 					++totlen;
517 					/* add room for needed escape chars */
518 					if ((*s == sep) || (*s == '\\'))
519 						++totlen;
520 					++s;
521 				}
522 			}
523 			totlen += 2;	/* for '=' and 'sep' char */
524 		}
525 	}
526 
527 #ifdef DEBUG
528 	/* Pass 1a: print unsorted list */
529 	printf("Unsorted: n=%d\n", n);
530 	for (i = 0; i < n; ++i) {
531 		printf("\t%3d: %p ==> %-10s => %s\n",
532 		       i, list[i], list[i]->key, list[i]->data);
533 	}
534 #endif
535 
536 	/* Sort list by keys */
537 	qsort(list, n, sizeof(ENTRY *), cmpkey);
538 
539 	/* Check if the user supplied buffer size is sufficient */
540 	if (size) {
541 		if (size < totlen + 1) {	/* provided buffer too small */
542 			debug("### buffer too small: %d, but need %d\n",
543 				size, totlen + 1);
544 			__set_errno(ENOMEM);
545 			return (-1);
546 		}
547 	} else {
548 		size = totlen + 1;
549 	}
550 
551 	/* Check if the user provided a buffer */
552 	if (*resp) {
553 		/* yes; clear it */
554 		res = *resp;
555 		memset(res, '\0', size);
556 	} else {
557 		/* no, allocate and clear one */
558 		*resp = res = calloc(1, size);
559 		if (res == NULL) {
560 			__set_errno(ENOMEM);
561 			return (-1);
562 		}
563 	}
564 	/*
565 	 * Pass 2:
566 	 * export sorted list of result data
567 	 */
568 	for (i = 0, p = res; i < n; ++i) {
569 		const char *s;
570 
571 		s = list[i]->key;
572 		while (*s)
573 			*p++ = *s++;
574 		*p++ = '=';
575 
576 		s = list[i]->data;
577 
578 		while (*s) {
579 			if ((*s == sep) || (*s == '\\'))
580 				*p++ = '\\';	/* escape */
581 			*p++ = *s++;
582 		}
583 		*p++ = sep;
584 	}
585 	*p = '\0';		/* terminate result */
586 
587 	return size;
588 }
589 
590 
591 /*
592  * himport()
593  */
594 
595 /*
596  * Import linearized data into hash table.
597  *
598  * This is the inverse function to hexport(): it takes a linear list
599  * of "name=value" pairs and creates hash table entries from it.
600  *
601  * Entries without "value", i. e. consisting of only "name" or
602  * "name=", will cause this entry to be deleted from the hash table.
603  *
604  * The "flag" argument can be used to control the behaviour: when the
605  * H_NOCLEAR bit is set, then an existing hash table will kept, i. e.
606  * new data will be added to an existing hash table; otherwise, old
607  * data will be discarded and a new hash table will be created.
608  *
609  * The separator character for the "name=value" pairs can be selected,
610  * so we both support importing from externally stored environment
611  * data (separated by NUL characters) and from plain text files
612  * (entries separated by newline characters).
613  *
614  * To allow for nicely formatted text input, leading white space
615  * (sequences of SPACE and TAB chars) is ignored, and entries starting
616  * (after removal of any leading white space) with a '#' character are
617  * considered comments and ignored.
618  *
619  * [NOTE: this means that a variable name cannot start with a '#'
620  * character.]
621  *
622  * When using a non-NUL separator character, backslash is used as
623  * escape character in the value part, allowing for example for
624  * multi-line values.
625  *
626  * In theory, arbitrary separator characters can be used, but only
627  * '\0' and '\n' have really been tested.
628  */
629 
630 int himport_r(struct hsearch_data *htab,
631 	      const char *env, size_t size, const char sep, int flag)
632 {
633 	char *data, *sp, *dp, *name, *value;
634 
635 	/* Test for correct arguments.  */
636 	if (htab == NULL) {
637 		__set_errno(EINVAL);
638 		return 0;
639 	}
640 
641 	/* we allocate new space to make sure we can write to the array */
642 	if ((data = malloc(size)) == NULL) {
643 		debug("himport_r: can't malloc %d bytes\n", size);
644 		__set_errno(ENOMEM);
645 		return 0;
646 	}
647 	memcpy(data, env, size);
648 	dp = data;
649 
650 	if ((flag & H_NOCLEAR) == 0) {
651 		/* Destroy old hash table if one exists */
652 		debug("Destroy Hash Table: %p table = %p\n", htab,
653 		       htab->table);
654 		if (htab->table)
655 			hdestroy_r(htab);
656 	}
657 
658 	/*
659 	 * Create new hash table (if needed).  The computation of the hash
660 	 * table size is based on heuristics: in a sample of some 70+
661 	 * existing systems we found an average size of 39+ bytes per entry
662 	 * in the environment (for the whole key=value pair). Assuming a
663 	 * size of 8 per entry (= safety factor of ~5) should provide enough
664 	 * safety margin for any existing environment definitions and still
665 	 * allow for more than enough dynamic additions. Note that the
666 	 * "size" argument is supposed to give the maximum enviroment size
667 	 * (CONFIG_ENV_SIZE).  This heuristics will result in
668 	 * unreasonably large numbers (and thus memory footprint) for
669 	 * big flash environments (>8,000 entries for 64 KB
670 	 * envrionment size), so we clip it to a reasonable value.
671 	 * On the other hand we need to add some more entries for free
672 	 * space when importing very small buffers. Both boundaries can
673 	 * be overwritten in the board config file if needed.
674 	 */
675 
676 	if (!htab->table) {
677 		int nent = CONFIG_ENV_MIN_ENTRIES + size / 8;
678 
679 		if (nent > CONFIG_ENV_MAX_ENTRIES)
680 			nent = CONFIG_ENV_MAX_ENTRIES;
681 
682 		debug("Create Hash Table: N=%d\n", nent);
683 
684 		if (hcreate_r(nent, htab) == 0) {
685 			free(data);
686 			return 0;
687 		}
688 	}
689 
690 	/* Parse environment; allow for '\0' and 'sep' as separators */
691 	do {
692 		ENTRY e, *rv;
693 
694 		/* skip leading white space */
695 		while (isblank(*dp))
696 			++dp;
697 
698 		/* skip comment lines */
699 		if (*dp == '#') {
700 			while (*dp && (*dp != sep))
701 				++dp;
702 			++dp;
703 			continue;
704 		}
705 
706 		/* parse name */
707 		for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp)
708 			;
709 
710 		/* deal with "name" and "name=" entries (delete var) */
711 		if (*dp == '\0' || *(dp + 1) == '\0' ||
712 		    *dp == sep || *(dp + 1) == sep) {
713 			if (*dp == '=')
714 				*dp++ = '\0';
715 			*dp++ = '\0';	/* terminate name */
716 
717 			debug("DELETE CANDIDATE: \"%s\"\n", name);
718 
719 			if (hdelete_r(name, htab) == 0)
720 				debug("DELETE ERROR ##############################\n");
721 
722 			continue;
723 		}
724 		*dp++ = '\0';	/* terminate name */
725 
726 		/* parse value; deal with escapes */
727 		for (value = sp = dp; *dp && (*dp != sep); ++dp) {
728 			if ((*dp == '\\') && *(dp + 1))
729 				++dp;
730 			*sp++ = *dp;
731 		}
732 		*sp++ = '\0';	/* terminate value */
733 		++dp;
734 
735 		/* enter into hash table */
736 		e.key = name;
737 		e.data = value;
738 
739 		hsearch_r(e, ENTER, &rv, htab);
740 		if (rv == NULL) {
741 			printf("himport_r: can't insert \"%s=%s\" into hash table\n",
742 				name, value);
743 			return 0;
744 		}
745 
746 		debug("INSERT: table %p, filled %d/%d rv %p ==> name=\"%s\" value=\"%s\"\n",
747 			htab, htab->filled, htab->size,
748 			rv, name, value);
749 	} while ((dp < data + size) && *dp);	/* size check needed for text */
750 						/* without '\0' termination */
751 	debug("INSERT: free(data = %p)\n", data);
752 	free(data);
753 
754 	debug("INSERT: done\n");
755 	return 1;		/* everything OK */
756 }
757