xref: /openbmc/u-boot/lib/hashtable.c (revision 4e3349b6)
1 /*
2  * This implementation is based on code from uClibc-0.9.30.3 but was
3  * modified and extended for use within U-Boot.
4  *
5  * Copyright (C) 2010 Wolfgang Denk <wd@denx.de>
6  *
7  * Original license header:
8  *
9  * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc.
10  * This file is part of the GNU C Library.
11  * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993.
12  *
13  * The GNU C Library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * The GNU C Library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with the GNU C Library; if not, write to the Free
25  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26  * 02111-1307 USA.
27  */
28 
29 #include <errno.h>
30 #include <malloc.h>
31 
32 #ifdef USE_HOSTCC		/* HOST build */
33 # include <string.h>
34 # include <assert.h>
35 # include <ctype.h>
36 
37 # ifndef debug
38 #  ifdef DEBUG
39 #   define debug(fmt,args...)	printf(fmt ,##args)
40 #  else
41 #   define debug(fmt,args...)
42 #  endif
43 # endif
44 #else				/* U-Boot build */
45 # include <common.h>
46 # include <linux/string.h>
47 # include <linux/ctype.h>
48 #endif
49 
50 #ifndef	CONFIG_ENV_MIN_ENTRIES	/* minimum number of entries */
51 #define	CONFIG_ENV_MIN_ENTRIES 64
52 #endif
53 #ifndef	CONFIG_ENV_MAX_ENTRIES	/* maximum number of entries */
54 #define	CONFIG_ENV_MAX_ENTRIES 512
55 #endif
56 
57 #include "search.h"
58 
59 /*
60  * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
61  * [Knuth]	      The Art of Computer Programming, part 3 (6.4)
62  */
63 
64 /*
65  * The reentrant version has no static variables to maintain the state.
66  * Instead the interface of all functions is extended to take an argument
67  * which describes the current status.
68  */
69 typedef struct _ENTRY {
70 	int used;
71 	ENTRY entry;
72 } _ENTRY;
73 
74 
75 /*
76  * hcreate()
77  */
78 
79 /*
80  * For the used double hash method the table size has to be a prime. To
81  * correct the user given table size we need a prime test.  This trivial
82  * algorithm is adequate because
83  * a)  the code is (most probably) called a few times per program run and
84  * b)  the number is small because the table must fit in the core
85  * */
86 static int isprime(unsigned int number)
87 {
88 	/* no even number will be passed */
89 	unsigned int div = 3;
90 
91 	while (div * div < number && number % div != 0)
92 		div += 2;
93 
94 	return number % div != 0;
95 }
96 
97 /*
98  * Before using the hash table we must allocate memory for it.
99  * Test for an existing table are done. We allocate one element
100  * more as the found prime number says. This is done for more effective
101  * indexing as explained in the comment for the hsearch function.
102  * The contents of the table is zeroed, especially the field used
103  * becomes zero.
104  */
105 
106 int hcreate_r(size_t nel, struct hsearch_data *htab)
107 {
108 	/* Test for correct arguments.  */
109 	if (htab == NULL) {
110 		__set_errno(EINVAL);
111 		return 0;
112 	}
113 
114 	/* There is still another table active. Return with error. */
115 	if (htab->table != NULL)
116 		return 0;
117 
118 	/* Change nel to the first prime number not smaller as nel. */
119 	nel |= 1;		/* make odd */
120 	while (!isprime(nel))
121 		nel += 2;
122 
123 	htab->size = nel;
124 	htab->filled = 0;
125 
126 	/* allocate memory and zero out */
127 	htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY));
128 	if (htab->table == NULL)
129 		return 0;
130 
131 	/* everything went alright */
132 	return 1;
133 }
134 
135 
136 /*
137  * hdestroy()
138  */
139 
140 /*
141  * After using the hash table it has to be destroyed. The used memory can
142  * be freed and the local static variable can be marked as not used.
143  */
144 
145 void hdestroy_r(struct hsearch_data *htab)
146 {
147 	int i;
148 
149 	/* Test for correct arguments.  */
150 	if (htab == NULL) {
151 		__set_errno(EINVAL);
152 		return;
153 	}
154 
155 	/* free used memory */
156 	for (i = 1; i <= htab->size; ++i) {
157 		if (htab->table[i].used > 0) {
158 			ENTRY *ep = &htab->table[i].entry;
159 
160 			free((void *)ep->key);
161 			free(ep->data);
162 		}
163 	}
164 	free(htab->table);
165 
166 	/* the sign for an existing table is an value != NULL in htable */
167 	htab->table = NULL;
168 }
169 
170 /*
171  * hsearch()
172  */
173 
174 /*
175  * This is the search function. It uses double hashing with open addressing.
176  * The argument item.key has to be a pointer to an zero terminated, most
177  * probably strings of chars. The function for generating a number of the
178  * strings is simple but fast. It can be replaced by a more complex function
179  * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
180  *
181  * We use an trick to speed up the lookup. The table is created by hcreate
182  * with one more element available. This enables us to use the index zero
183  * special. This index will never be used because we store the first hash
184  * index in the field used where zero means not used. Every other value
185  * means used. The used field can be used as a first fast comparison for
186  * equality of the stored and the parameter value. This helps to prevent
187  * unnecessary expensive calls of strcmp.
188  *
189  * This implementation differs from the standard library version of
190  * this function in a number of ways:
191  *
192  * - While the standard version does not make any assumptions about
193  *   the type of the stored data objects at all, this implementation
194  *   works with NUL terminated strings only.
195  * - Instead of storing just pointers to the original objects, we
196  *   create local copies so the caller does not need to care about the
197  *   data any more.
198  * - The standard implementation does not provide a way to update an
199  *   existing entry.  This version will create a new entry or update an
200  *   existing one when both "action == ENTER" and "item.data != NULL".
201  * - Instead of returning 1 on success, we return the index into the
202  *   internal hash table, which is also guaranteed to be positive.
203  *   This allows us direct access to the found hash table slot for
204  *   example for functions like hdelete().
205  */
206 
207 /*
208  * hstrstr_r - return index to entry whose key and/or data contains match
209  */
210 int hstrstr_r(const char *match, int last_idx, ENTRY ** retval,
211 	      struct hsearch_data *htab)
212 {
213 	unsigned int idx;
214 
215 	for (idx = last_idx + 1; idx < htab->size; ++idx) {
216 		if (htab->table[idx].used <= 0)
217 			continue;
218 		if (strstr(htab->table[idx].entry.key, match) ||
219 		    strstr(htab->table[idx].entry.data, match)) {
220 			*retval = &htab->table[idx].entry;
221 			return idx;
222 		}
223 	}
224 
225 	__set_errno(ESRCH);
226 	*retval = NULL;
227 	return 0;
228 }
229 
230 int hmatch_r(const char *match, int last_idx, ENTRY ** retval,
231 	     struct hsearch_data *htab)
232 {
233 	unsigned int idx;
234 	size_t key_len = strlen(match);
235 
236 	for (idx = last_idx + 1; idx < htab->size; ++idx) {
237 		if (htab->table[idx].used <= 0)
238 			continue;
239 		if (!strncmp(match, htab->table[idx].entry.key, key_len)) {
240 			*retval = &htab->table[idx].entry;
241 			return idx;
242 		}
243 	}
244 
245 	__set_errno(ESRCH);
246 	*retval = NULL;
247 	return 0;
248 }
249 
250 int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
251 	      struct hsearch_data *htab)
252 {
253 	unsigned int hval;
254 	unsigned int count;
255 	unsigned int len = strlen(item.key);
256 	unsigned int idx;
257 	unsigned int first_deleted = 0;
258 
259 	/* Compute an value for the given string. Perhaps use a better method. */
260 	hval = len;
261 	count = len;
262 	while (count-- > 0) {
263 		hval <<= 4;
264 		hval += item.key[count];
265 	}
266 
267 	/*
268 	 * First hash function:
269 	 * simply take the modul but prevent zero.
270 	 */
271 	hval %= htab->size;
272 	if (hval == 0)
273 		++hval;
274 
275 	/* The first index tried. */
276 	idx = hval;
277 
278 	if (htab->table[idx].used) {
279 		/*
280 		 * Further action might be required according to the
281 		 * action value.
282 		 */
283 		unsigned hval2;
284 
285 		if (htab->table[idx].used == -1
286 		    && !first_deleted)
287 			first_deleted = idx;
288 
289 		if (htab->table[idx].used == hval
290 		    && strcmp(item.key, htab->table[idx].entry.key) == 0) {
291 			/* Overwrite existing value? */
292 			if ((action == ENTER) && (item.data != NULL)) {
293 				free(htab->table[idx].entry.data);
294 				htab->table[idx].entry.data =
295 					strdup(item.data);
296 				if (!htab->table[idx].entry.data) {
297 					__set_errno(ENOMEM);
298 					*retval = NULL;
299 					return 0;
300 				}
301 			}
302 			/* return found entry */
303 			*retval = &htab->table[idx].entry;
304 			return idx;
305 		}
306 
307 		/*
308 		 * Second hash function:
309 		 * as suggested in [Knuth]
310 		 */
311 		hval2 = 1 + hval % (htab->size - 2);
312 
313 		do {
314 			/*
315 			 * Because SIZE is prime this guarantees to
316 			 * step through all available indices.
317 			 */
318 			if (idx <= hval2)
319 				idx = htab->size + idx - hval2;
320 			else
321 				idx -= hval2;
322 
323 			/*
324 			 * If we visited all entries leave the loop
325 			 * unsuccessfully.
326 			 */
327 			if (idx == hval)
328 				break;
329 
330 			/* If entry is found use it. */
331 			if ((htab->table[idx].used == hval)
332 			    && strcmp(item.key, htab->table[idx].entry.key) == 0) {
333 				/* Overwrite existing value? */
334 				if ((action == ENTER) && (item.data != NULL)) {
335 					free(htab->table[idx].entry.data);
336 					htab->table[idx].entry.data =
337 						strdup(item.data);
338 					if (!htab->table[idx].entry.data) {
339 						__set_errno(ENOMEM);
340 						*retval = NULL;
341 						return 0;
342 					}
343 				}
344 				/* return found entry */
345 				*retval = &htab->table[idx].entry;
346 				return idx;
347 			}
348 		}
349 		while (htab->table[idx].used);
350 	}
351 
352 	/* An empty bucket has been found. */
353 	if (action == ENTER) {
354 		/*
355 		 * If table is full and another entry should be
356 		 * entered return with error.
357 		 */
358 		if (htab->filled == htab->size) {
359 			__set_errno(ENOMEM);
360 			*retval = NULL;
361 			return 0;
362 		}
363 
364 		/*
365 		 * Create new entry;
366 		 * create copies of item.key and item.data
367 		 */
368 		if (first_deleted)
369 			idx = first_deleted;
370 
371 		htab->table[idx].used = hval;
372 		htab->table[idx].entry.key = strdup(item.key);
373 		htab->table[idx].entry.data = strdup(item.data);
374 		if (!htab->table[idx].entry.key ||
375 		    !htab->table[idx].entry.data) {
376 			__set_errno(ENOMEM);
377 			*retval = NULL;
378 			return 0;
379 		}
380 
381 		++htab->filled;
382 
383 		/* return new entry */
384 		*retval = &htab->table[idx].entry;
385 		return 1;
386 	}
387 
388 	__set_errno(ESRCH);
389 	*retval = NULL;
390 	return 0;
391 }
392 
393 
394 /*
395  * hdelete()
396  */
397 
398 /*
399  * The standard implementation of hsearch(3) does not provide any way
400  * to delete any entries from the hash table.  We extend the code to
401  * do that.
402  */
403 
404 int hdelete_r(const char *key, struct hsearch_data *htab)
405 {
406 	ENTRY e, *ep;
407 	int idx;
408 
409 	debug("hdelete: DELETE key \"%s\"\n", key);
410 
411 	e.key = (char *)key;
412 
413 	if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
414 		__set_errno(ESRCH);
415 		return 0;	/* not found */
416 	}
417 
418 	/* free used ENTRY */
419 	debug("hdelete: DELETING key \"%s\"\n", key);
420 
421 	free((void *)ep->key);
422 	free(ep->data);
423 	htab->table[idx].used = -1;
424 
425 	--htab->filled;
426 
427 	return 1;
428 }
429 
430 /*
431  * hexport()
432  */
433 
434 /*
435  * Export the data stored in the hash table in linearized form.
436  *
437  * Entries are exported as "name=value" strings, separated by an
438  * arbitrary (non-NUL, of course) separator character. This allows to
439  * use this function both when formatting the U-Boot environment for
440  * external storage (using '\0' as separator), but also when using it
441  * for the "printenv" command to print all variables, simply by using
442  * as '\n" as separator. This can also be used for new features like
443  * exporting the environment data as text file, including the option
444  * for later re-import.
445  *
446  * The entries in the result list will be sorted by ascending key
447  * values.
448  *
449  * If the separator character is different from NUL, then any
450  * separator characters and backslash characters in the values will
451  * be escaped by a preceeding backslash in output. This is needed for
452  * example to enable multi-line values, especially when the output
453  * shall later be parsed (for example, for re-import).
454  *
455  * There are several options how the result buffer is handled:
456  *
457  * *resp  size
458  * -----------
459  *  NULL    0	A string of sufficient length will be allocated.
460  *  NULL   >0	A string of the size given will be
461  *		allocated. An error will be returned if the size is
462  *		not sufficient.  Any unused bytes in the string will
463  *		be '\0'-padded.
464  * !NULL    0	The user-supplied buffer will be used. No length
465  *		checking will be performed, i. e. it is assumed that
466  *		the buffer size will always be big enough. DANGEROUS.
467  * !NULL   >0	The user-supplied buffer will be used. An error will
468  *		be returned if the size is not sufficient.  Any unused
469  *		bytes in the string will be '\0'-padded.
470  */
471 
472 static int cmpkey(const void *p1, const void *p2)
473 {
474 	ENTRY *e1 = *(ENTRY **) p1;
475 	ENTRY *e2 = *(ENTRY **) p2;
476 
477 	return (strcmp(e1->key, e2->key));
478 }
479 
480 ssize_t hexport_r(struct hsearch_data *htab, const char sep,
481 		 char **resp, size_t size,
482 		 int argc, char * const argv[])
483 {
484 	ENTRY *list[htab->size];
485 	char *res, *p;
486 	size_t totlen;
487 	int i, n;
488 
489 	/* Test for correct arguments.  */
490 	if ((resp == NULL) || (htab == NULL)) {
491 		__set_errno(EINVAL);
492 		return (-1);
493 	}
494 
495 	debug("EXPORT  table = %p, htab.size = %d, htab.filled = %d, "
496 		"size = %zu\n", htab, htab->size, htab->filled, size);
497 	/*
498 	 * Pass 1:
499 	 * search used entries,
500 	 * save addresses and compute total length
501 	 */
502 	for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
503 
504 		if (htab->table[i].used > 0) {
505 			ENTRY *ep = &htab->table[i].entry;
506 			int arg, found = 0;
507 
508 			for (arg = 0; arg < argc; ++arg) {
509 				if (strcmp(argv[arg], ep->key) == 0) {
510 					found = 1;
511 					break;
512 				}
513 			}
514 			if ((argc > 0) && (found == 0))
515 				continue;
516 
517 			list[n++] = ep;
518 
519 			totlen += strlen(ep->key) + 2;
520 
521 			if (sep == '\0') {
522 				totlen += strlen(ep->data);
523 			} else {	/* check if escapes are needed */
524 				char *s = ep->data;
525 
526 				while (*s) {
527 					++totlen;
528 					/* add room for needed escape chars */
529 					if ((*s == sep) || (*s == '\\'))
530 						++totlen;
531 					++s;
532 				}
533 			}
534 			totlen += 2;	/* for '=' and 'sep' char */
535 		}
536 	}
537 
538 #ifdef DEBUG
539 	/* Pass 1a: print unsorted list */
540 	printf("Unsorted: n=%d\n", n);
541 	for (i = 0; i < n; ++i) {
542 		printf("\t%3d: %p ==> %-10s => %s\n",
543 		       i, list[i], list[i]->key, list[i]->data);
544 	}
545 #endif
546 
547 	/* Sort list by keys */
548 	qsort(list, n, sizeof(ENTRY *), cmpkey);
549 
550 	/* Check if the user supplied buffer size is sufficient */
551 	if (size) {
552 		if (size < totlen + 1) {	/* provided buffer too small */
553 			printf("Env export buffer too small: %zu, "
554 				"but need %zu\n", size, totlen + 1);
555 			__set_errno(ENOMEM);
556 			return (-1);
557 		}
558 	} else {
559 		size = totlen + 1;
560 	}
561 
562 	/* Check if the user provided a buffer */
563 	if (*resp) {
564 		/* yes; clear it */
565 		res = *resp;
566 		memset(res, '\0', size);
567 	} else {
568 		/* no, allocate and clear one */
569 		*resp = res = calloc(1, size);
570 		if (res == NULL) {
571 			__set_errno(ENOMEM);
572 			return (-1);
573 		}
574 	}
575 	/*
576 	 * Pass 2:
577 	 * export sorted list of result data
578 	 */
579 	for (i = 0, p = res; i < n; ++i) {
580 		const char *s;
581 
582 		s = list[i]->key;
583 		while (*s)
584 			*p++ = *s++;
585 		*p++ = '=';
586 
587 		s = list[i]->data;
588 
589 		while (*s) {
590 			if ((*s == sep) || (*s == '\\'))
591 				*p++ = '\\';	/* escape */
592 			*p++ = *s++;
593 		}
594 		*p++ = sep;
595 	}
596 	*p = '\0';		/* terminate result */
597 
598 	return size;
599 }
600 
601 
602 /*
603  * himport()
604  */
605 
606 /*
607  * Import linearized data into hash table.
608  *
609  * This is the inverse function to hexport(): it takes a linear list
610  * of "name=value" pairs and creates hash table entries from it.
611  *
612  * Entries without "value", i. e. consisting of only "name" or
613  * "name=", will cause this entry to be deleted from the hash table.
614  *
615  * The "flag" argument can be used to control the behaviour: when the
616  * H_NOCLEAR bit is set, then an existing hash table will kept, i. e.
617  * new data will be added to an existing hash table; otherwise, old
618  * data will be discarded and a new hash table will be created.
619  *
620  * The separator character for the "name=value" pairs can be selected,
621  * so we both support importing from externally stored environment
622  * data (separated by NUL characters) and from plain text files
623  * (entries separated by newline characters).
624  *
625  * To allow for nicely formatted text input, leading white space
626  * (sequences of SPACE and TAB chars) is ignored, and entries starting
627  * (after removal of any leading white space) with a '#' character are
628  * considered comments and ignored.
629  *
630  * [NOTE: this means that a variable name cannot start with a '#'
631  * character.]
632  *
633  * When using a non-NUL separator character, backslash is used as
634  * escape character in the value part, allowing for example for
635  * multi-line values.
636  *
637  * In theory, arbitrary separator characters can be used, but only
638  * '\0' and '\n' have really been tested.
639  */
640 
641 int himport_r(struct hsearch_data *htab,
642 	      const char *env, size_t size, const char sep, int flag)
643 {
644 	char *data, *sp, *dp, *name, *value;
645 
646 	/* Test for correct arguments.  */
647 	if (htab == NULL) {
648 		__set_errno(EINVAL);
649 		return 0;
650 	}
651 
652 	/* we allocate new space to make sure we can write to the array */
653 	if ((data = malloc(size)) == NULL) {
654 		debug("himport_r: can't malloc %zu bytes\n", size);
655 		__set_errno(ENOMEM);
656 		return 0;
657 	}
658 	memcpy(data, env, size);
659 	dp = data;
660 
661 	if ((flag & H_NOCLEAR) == 0) {
662 		/* Destroy old hash table if one exists */
663 		debug("Destroy Hash Table: %p table = %p\n", htab,
664 		       htab->table);
665 		if (htab->table)
666 			hdestroy_r(htab);
667 	}
668 
669 	/*
670 	 * Create new hash table (if needed).  The computation of the hash
671 	 * table size is based on heuristics: in a sample of some 70+
672 	 * existing systems we found an average size of 39+ bytes per entry
673 	 * in the environment (for the whole key=value pair). Assuming a
674 	 * size of 8 per entry (= safety factor of ~5) should provide enough
675 	 * safety margin for any existing environment definitions and still
676 	 * allow for more than enough dynamic additions. Note that the
677 	 * "size" argument is supposed to give the maximum enviroment size
678 	 * (CONFIG_ENV_SIZE).  This heuristics will result in
679 	 * unreasonably large numbers (and thus memory footprint) for
680 	 * big flash environments (>8,000 entries for 64 KB
681 	 * envrionment size), so we clip it to a reasonable value.
682 	 * On the other hand we need to add some more entries for free
683 	 * space when importing very small buffers. Both boundaries can
684 	 * be overwritten in the board config file if needed.
685 	 */
686 
687 	if (!htab->table) {
688 		int nent = CONFIG_ENV_MIN_ENTRIES + size / 8;
689 
690 		if (nent > CONFIG_ENV_MAX_ENTRIES)
691 			nent = CONFIG_ENV_MAX_ENTRIES;
692 
693 		debug("Create Hash Table: N=%d\n", nent);
694 
695 		if (hcreate_r(nent, htab) == 0) {
696 			free(data);
697 			return 0;
698 		}
699 	}
700 
701 	/* Parse environment; allow for '\0' and 'sep' as separators */
702 	do {
703 		ENTRY e, *rv;
704 
705 		/* skip leading white space */
706 		while (isblank(*dp))
707 			++dp;
708 
709 		/* skip comment lines */
710 		if (*dp == '#') {
711 			while (*dp && (*dp != sep))
712 				++dp;
713 			++dp;
714 			continue;
715 		}
716 
717 		/* parse name */
718 		for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp)
719 			;
720 
721 		/* deal with "name" and "name=" entries (delete var) */
722 		if (*dp == '\0' || *(dp + 1) == '\0' ||
723 		    *dp == sep || *(dp + 1) == sep) {
724 			if (*dp == '=')
725 				*dp++ = '\0';
726 			*dp++ = '\0';	/* terminate name */
727 
728 			debug("DELETE CANDIDATE: \"%s\"\n", name);
729 
730 			if (hdelete_r(name, htab) == 0)
731 				debug("DELETE ERROR ##############################\n");
732 
733 			continue;
734 		}
735 		*dp++ = '\0';	/* terminate name */
736 
737 		/* parse value; deal with escapes */
738 		for (value = sp = dp; *dp && (*dp != sep); ++dp) {
739 			if ((*dp == '\\') && *(dp + 1))
740 				++dp;
741 			*sp++ = *dp;
742 		}
743 		*sp++ = '\0';	/* terminate value */
744 		++dp;
745 
746 		/* enter into hash table */
747 		e.key = name;
748 		e.data = value;
749 
750 		hsearch_r(e, ENTER, &rv, htab);
751 		if (rv == NULL) {
752 			printf("himport_r: can't insert \"%s=%s\" into hash table\n",
753 				name, value);
754 			return 0;
755 		}
756 
757 		debug("INSERT: table %p, filled %d/%d rv %p ==> name=\"%s\" value=\"%s\"\n",
758 			htab, htab->filled, htab->size,
759 			rv, name, value);
760 	} while ((dp < data + size) && *dp);	/* size check needed for text */
761 						/* without '\0' termination */
762 	debug("INSERT: free(data = %p)\n", data);
763 	free(data);
764 
765 	debug("INSERT: done\n");
766 	return 1;		/* everything OK */
767 }
768