xref: /openbmc/u-boot/lib/hashtable.c (revision 1a3cb4ad)
1  /*
2   * This implementation is based on code from uClibc-0.9.30.3 but was
3   * modified and extended for use within U-Boot.
4   *
5   * Copyright (C) 2010 Wolfgang Denk <wd@denx.de>
6   *
7   * Original license header:
8   *
9   * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc.
10   * This file is part of the GNU C Library.
11   * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993.
12   *
13   * The GNU C Library is free software; you can redistribute it and/or
14   * modify it under the terms of the GNU Lesser General Public
15   * License as published by the Free Software Foundation; either
16   * version 2.1 of the License, or (at your option) any later version.
17   *
18   * The GNU C Library is distributed in the hope that it will be useful,
19   * but WITHOUT ANY WARRANTY; without even the implied warranty of
20   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   * Lesser General Public License for more details.
22   *
23   * You should have received a copy of the GNU Lesser General Public
24   * License along with the GNU C Library; if not, write to the Free
25   * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   * 02111-1307 USA.
27   */
28  
29  #include <errno.h>
30  #include <malloc.h>
31  
32  #ifdef USE_HOSTCC		/* HOST build */
33  # include <string.h>
34  # include <assert.h>
35  # include <ctype.h>
36  
37  # ifndef debug
38  #  ifdef DEBUG
39  #   define debug(fmt,args...)	printf(fmt ,##args)
40  #  else
41  #   define debug(fmt,args...)
42  #  endif
43  # endif
44  #else				/* U-Boot build */
45  # include <common.h>
46  # include <linux/string.h>
47  # include <linux/ctype.h>
48  #endif
49  
50  #ifndef	CONFIG_ENV_MIN_ENTRIES	/* minimum number of entries */
51  #define	CONFIG_ENV_MIN_ENTRIES 64
52  #endif
53  #ifndef	CONFIG_ENV_MAX_ENTRIES	/* maximum number of entries */
54  #define	CONFIG_ENV_MAX_ENTRIES 512
55  #endif
56  
57  #include "search.h"
58  
59  /*
60   * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
61   * [Knuth]	      The Art of Computer Programming, part 3 (6.4)
62   */
63  
64  /*
65   * The reentrant version has no static variables to maintain the state.
66   * Instead the interface of all functions is extended to take an argument
67   * which describes the current status.
68   */
69  typedef struct _ENTRY {
70  	int used;
71  	ENTRY entry;
72  } _ENTRY;
73  
74  
75  /*
76   * hcreate()
77   */
78  
79  /*
80   * For the used double hash method the table size has to be a prime. To
81   * correct the user given table size we need a prime test.  This trivial
82   * algorithm is adequate because
83   * a)  the code is (most probably) called a few times per program run and
84   * b)  the number is small because the table must fit in the core
85   * */
86  static int isprime(unsigned int number)
87  {
88  	/* no even number will be passed */
89  	unsigned int div = 3;
90  
91  	while (div * div < number && number % div != 0)
92  		div += 2;
93  
94  	return number % div != 0;
95  }
96  
97  /*
98   * Before using the hash table we must allocate memory for it.
99   * Test for an existing table are done. We allocate one element
100   * more as the found prime number says. This is done for more effective
101   * indexing as explained in the comment for the hsearch function.
102   * The contents of the table is zeroed, especially the field used
103   * becomes zero.
104   */
105  
106  int hcreate_r(size_t nel, struct hsearch_data *htab)
107  {
108  	/* Test for correct arguments.  */
109  	if (htab == NULL) {
110  		__set_errno(EINVAL);
111  		return 0;
112  	}
113  
114  	/* There is still another table active. Return with error. */
115  	if (htab->table != NULL)
116  		return 0;
117  
118  	/* Change nel to the first prime number not smaller as nel. */
119  	nel |= 1;		/* make odd */
120  	while (!isprime(nel))
121  		nel += 2;
122  
123  	htab->size = nel;
124  	htab->filled = 0;
125  
126  	/* allocate memory and zero out */
127  	htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY));
128  	if (htab->table == NULL)
129  		return 0;
130  
131  	/* everything went alright */
132  	return 1;
133  }
134  
135  
136  /*
137   * hdestroy()
138   */
139  
140  /*
141   * After using the hash table it has to be destroyed. The used memory can
142   * be freed and the local static variable can be marked as not used.
143   */
144  
145  void hdestroy_r(struct hsearch_data *htab)
146  {
147  	int i;
148  
149  	/* Test for correct arguments.  */
150  	if (htab == NULL) {
151  		__set_errno(EINVAL);
152  		return;
153  	}
154  
155  	/* free used memory */
156  	for (i = 1; i <= htab->size; ++i) {
157  		if (htab->table[i].used > 0) {
158  			ENTRY *ep = &htab->table[i].entry;
159  
160  			free((void *)ep->key);
161  			free(ep->data);
162  		}
163  	}
164  	free(htab->table);
165  
166  	/* the sign for an existing table is an value != NULL in htable */
167  	htab->table = NULL;
168  }
169  
170  /*
171   * hsearch()
172   */
173  
174  /*
175   * This is the search function. It uses double hashing with open addressing.
176   * The argument item.key has to be a pointer to an zero terminated, most
177   * probably strings of chars. The function for generating a number of the
178   * strings is simple but fast. It can be replaced by a more complex function
179   * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
180   *
181   * We use an trick to speed up the lookup. The table is created by hcreate
182   * with one more element available. This enables us to use the index zero
183   * special. This index will never be used because we store the first hash
184   * index in the field used where zero means not used. Every other value
185   * means used. The used field can be used as a first fast comparison for
186   * equality of the stored and the parameter value. This helps to prevent
187   * unnecessary expensive calls of strcmp.
188   *
189   * This implementation differs from the standard library version of
190   * this function in a number of ways:
191   *
192   * - While the standard version does not make any assumptions about
193   *   the type of the stored data objects at all, this implementation
194   *   works with NUL terminated strings only.
195   * - Instead of storing just pointers to the original objects, we
196   *   create local copies so the caller does not need to care about the
197   *   data any more.
198   * - The standard implementation does not provide a way to update an
199   *   existing entry.  This version will create a new entry or update an
200   *   existing one when both "action == ENTER" and "item.data != NULL".
201   * - Instead of returning 1 on success, we return the index into the
202   *   internal hash table, which is also guaranteed to be positive.
203   *   This allows us direct access to the found hash table slot for
204   *   example for functions like hdelete().
205   */
206  
207  /*
208   * hstrstr_r - return index to entry whose key and/or data contains match
209   */
210  int hstrstr_r(const char *match, int last_idx, ENTRY ** retval,
211  	      struct hsearch_data *htab)
212  {
213  	unsigned int idx;
214  
215  	for (idx = last_idx + 1; idx < htab->size; ++idx) {
216  		if (htab->table[idx].used <= 0)
217  			continue;
218  		if (strstr(htab->table[idx].entry.key, match) ||
219  		    strstr(htab->table[idx].entry.data, match)) {
220  			*retval = &htab->table[idx].entry;
221  			return idx;
222  		}
223  	}
224  
225  	__set_errno(ESRCH);
226  	*retval = NULL;
227  	return 0;
228  }
229  
230  int hmatch_r(const char *match, int last_idx, ENTRY ** retval,
231  	     struct hsearch_data *htab)
232  {
233  	unsigned int idx;
234  	size_t key_len = strlen(match);
235  
236  	for (idx = last_idx + 1; idx < htab->size; ++idx) {
237  		if (htab->table[idx].used <= 0)
238  			continue;
239  		if (!strncmp(match, htab->table[idx].entry.key, key_len)) {
240  			*retval = &htab->table[idx].entry;
241  			return idx;
242  		}
243  	}
244  
245  	__set_errno(ESRCH);
246  	*retval = NULL;
247  	return 0;
248  }
249  
250  int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
251  	      struct hsearch_data *htab)
252  {
253  	unsigned int hval;
254  	unsigned int count;
255  	unsigned int len = strlen(item.key);
256  	unsigned int idx;
257  	unsigned int first_deleted = 0;
258  
259  	/* Compute an value for the given string. Perhaps use a better method. */
260  	hval = len;
261  	count = len;
262  	while (count-- > 0) {
263  		hval <<= 4;
264  		hval += item.key[count];
265  	}
266  
267  	/*
268  	 * First hash function:
269  	 * simply take the modul but prevent zero.
270  	 */
271  	hval %= htab->size;
272  	if (hval == 0)
273  		++hval;
274  
275  	/* The first index tried. */
276  	idx = hval;
277  
278  	if (htab->table[idx].used) {
279  		/*
280  		 * Further action might be required according to the
281  		 * action value.
282  		 */
283  		unsigned hval2;
284  
285  		if (htab->table[idx].used == -1
286  		    && !first_deleted)
287  			first_deleted = idx;
288  
289  		if (htab->table[idx].used == hval
290  		    && strcmp(item.key, htab->table[idx].entry.key) == 0) {
291  			/* Overwrite existing value? */
292  			if ((action == ENTER) && (item.data != NULL)) {
293  				free(htab->table[idx].entry.data);
294  				htab->table[idx].entry.data =
295  					strdup(item.data);
296  				if (!htab->table[idx].entry.data) {
297  					__set_errno(ENOMEM);
298  					*retval = NULL;
299  					return 0;
300  				}
301  			}
302  			/* return found entry */
303  			*retval = &htab->table[idx].entry;
304  			return idx;
305  		}
306  
307  		/*
308  		 * Second hash function:
309  		 * as suggested in [Knuth]
310  		 */
311  		hval2 = 1 + hval % (htab->size - 2);
312  
313  		do {
314  			/*
315  			 * Because SIZE is prime this guarantees to
316  			 * step through all available indices.
317  			 */
318  			if (idx <= hval2)
319  				idx = htab->size + idx - hval2;
320  			else
321  				idx -= hval2;
322  
323  			/*
324  			 * If we visited all entries leave the loop
325  			 * unsuccessfully.
326  			 */
327  			if (idx == hval)
328  				break;
329  
330  			/* If entry is found use it. */
331  			if ((htab->table[idx].used == hval)
332  			    && strcmp(item.key, htab->table[idx].entry.key) == 0) {
333  				/* Overwrite existing value? */
334  				if ((action == ENTER) && (item.data != NULL)) {
335  					free(htab->table[idx].entry.data);
336  					htab->table[idx].entry.data =
337  						strdup(item.data);
338  					if (!htab->table[idx].entry.data) {
339  						__set_errno(ENOMEM);
340  						*retval = NULL;
341  						return 0;
342  					}
343  				}
344  				/* return found entry */
345  				*retval = &htab->table[idx].entry;
346  				return idx;
347  			}
348  		}
349  		while (htab->table[idx].used);
350  	}
351  
352  	/* An empty bucket has been found. */
353  	if (action == ENTER) {
354  		/*
355  		 * If table is full and another entry should be
356  		 * entered return with error.
357  		 */
358  		if (htab->filled == htab->size) {
359  			__set_errno(ENOMEM);
360  			*retval = NULL;
361  			return 0;
362  		}
363  
364  		/*
365  		 * Create new entry;
366  		 * create copies of item.key and item.data
367  		 */
368  		if (first_deleted)
369  			idx = first_deleted;
370  
371  		htab->table[idx].used = hval;
372  		htab->table[idx].entry.key = strdup(item.key);
373  		htab->table[idx].entry.data = strdup(item.data);
374  		if (!htab->table[idx].entry.key ||
375  		    !htab->table[idx].entry.data) {
376  			__set_errno(ENOMEM);
377  			*retval = NULL;
378  			return 0;
379  		}
380  
381  		++htab->filled;
382  
383  		/* return new entry */
384  		*retval = &htab->table[idx].entry;
385  		return 1;
386  	}
387  
388  	__set_errno(ESRCH);
389  	*retval = NULL;
390  	return 0;
391  }
392  
393  
394  /*
395   * hdelete()
396   */
397  
398  /*
399   * The standard implementation of hsearch(3) does not provide any way
400   * to delete any entries from the hash table.  We extend the code to
401   * do that.
402   */
403  
404  int hdelete_r(const char *key, struct hsearch_data *htab)
405  {
406  	ENTRY e, *ep;
407  	int idx;
408  
409  	debug("hdelete: DELETE key \"%s\"\n", key);
410  
411  	e.key = (char *)key;
412  
413  	if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
414  		__set_errno(ESRCH);
415  		return 0;	/* not found */
416  	}
417  
418  	/* free used ENTRY */
419  	debug("hdelete: DELETING key \"%s\"\n", key);
420  
421  	free((void *)ep->key);
422  	free(ep->data);
423  	htab->table[idx].used = -1;
424  
425  	--htab->filled;
426  
427  	return 1;
428  }
429  
430  /*
431   * hexport()
432   */
433  
434  /*
435   * Export the data stored in the hash table in linearized form.
436   *
437   * Entries are exported as "name=value" strings, separated by an
438   * arbitrary (non-NUL, of course) separator character. This allows to
439   * use this function both when formatting the U-Boot environment for
440   * external storage (using '\0' as separator), but also when using it
441   * for the "printenv" command to print all variables, simply by using
442   * as '\n" as separator. This can also be used for new features like
443   * exporting the environment data as text file, including the option
444   * for later re-import.
445   *
446   * The entries in the result list will be sorted by ascending key
447   * values.
448   *
449   * If the separator character is different from NUL, then any
450   * separator characters and backslash characters in the values will
451   * be escaped by a preceeding backslash in output. This is needed for
452   * example to enable multi-line values, especially when the output
453   * shall later be parsed (for example, for re-import).
454   *
455   * There are several options how the result buffer is handled:
456   *
457   * *resp  size
458   * -----------
459   *  NULL    0	A string of sufficient length will be allocated.
460   *  NULL   >0	A string of the size given will be
461   *		allocated. An error will be returned if the size is
462   *		not sufficient.  Any unused bytes in the string will
463   *		be '\0'-padded.
464   * !NULL    0	The user-supplied buffer will be used. No length
465   *		checking will be performed, i. e. it is assumed that
466   *		the buffer size will always be big enough. DANGEROUS.
467   * !NULL   >0	The user-supplied buffer will be used. An error will
468   *		be returned if the size is not sufficient.  Any unused
469   *		bytes in the string will be '\0'-padded.
470   */
471  
472  static int cmpkey(const void *p1, const void *p2)
473  {
474  	ENTRY *e1 = *(ENTRY **) p1;
475  	ENTRY *e2 = *(ENTRY **) p2;
476  
477  	return (strcmp(e1->key, e2->key));
478  }
479  
480  ssize_t hexport_r(struct hsearch_data *htab, const char sep,
481  		 char **resp, size_t size,
482  		 int argc, char * const argv[])
483  {
484  	ENTRY *list[htab->size];
485  	char *res, *p;
486  	size_t totlen;
487  	int i, n;
488  
489  	/* Test for correct arguments.  */
490  	if ((resp == NULL) || (htab == NULL)) {
491  		__set_errno(EINVAL);
492  		return (-1);
493  	}
494  
495  	debug("EXPORT  table = %p, htab.size = %d, htab.filled = %d, "
496  		"size = %zu\n", htab, htab->size, htab->filled, size);
497  	/*
498  	 * Pass 1:
499  	 * search used entries,
500  	 * save addresses and compute total length
501  	 */
502  	for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
503  
504  		if (htab->table[i].used > 0) {
505  			ENTRY *ep = &htab->table[i].entry;
506  			int arg, found = 0;
507  
508  			for (arg = 0; arg < argc; ++arg) {
509  				if (strcmp(argv[arg], ep->key) == 0) {
510  					found = 1;
511  					break;
512  				}
513  			}
514  			if ((argc > 0) && (found == 0))
515  				continue;
516  
517  			list[n++] = ep;
518  
519  			totlen += strlen(ep->key) + 2;
520  
521  			if (sep == '\0') {
522  				totlen += strlen(ep->data);
523  			} else {	/* check if escapes are needed */
524  				char *s = ep->data;
525  
526  				while (*s) {
527  					++totlen;
528  					/* add room for needed escape chars */
529  					if ((*s == sep) || (*s == '\\'))
530  						++totlen;
531  					++s;
532  				}
533  			}
534  			totlen += 2;	/* for '=' and 'sep' char */
535  		}
536  	}
537  
538  #ifdef DEBUG
539  	/* Pass 1a: print unsorted list */
540  	printf("Unsorted: n=%d\n", n);
541  	for (i = 0; i < n; ++i) {
542  		printf("\t%3d: %p ==> %-10s => %s\n",
543  		       i, list[i], list[i]->key, list[i]->data);
544  	}
545  #endif
546  
547  	/* Sort list by keys */
548  	qsort(list, n, sizeof(ENTRY *), cmpkey);
549  
550  	/* Check if the user supplied buffer size is sufficient */
551  	if (size) {
552  		if (size < totlen + 1) {	/* provided buffer too small */
553  			printf("Env export buffer too small: %zu, "
554  				"but need %zu\n", size, totlen + 1);
555  			__set_errno(ENOMEM);
556  			return (-1);
557  		}
558  	} else {
559  		size = totlen + 1;
560  	}
561  
562  	/* Check if the user provided a buffer */
563  	if (*resp) {
564  		/* yes; clear it */
565  		res = *resp;
566  		memset(res, '\0', size);
567  	} else {
568  		/* no, allocate and clear one */
569  		*resp = res = calloc(1, size);
570  		if (res == NULL) {
571  			__set_errno(ENOMEM);
572  			return (-1);
573  		}
574  	}
575  	/*
576  	 * Pass 2:
577  	 * export sorted list of result data
578  	 */
579  	for (i = 0, p = res; i < n; ++i) {
580  		const char *s;
581  
582  		s = list[i]->key;
583  		while (*s)
584  			*p++ = *s++;
585  		*p++ = '=';
586  
587  		s = list[i]->data;
588  
589  		while (*s) {
590  			if ((*s == sep) || (*s == '\\'))
591  				*p++ = '\\';	/* escape */
592  			*p++ = *s++;
593  		}
594  		*p++ = sep;
595  	}
596  	*p = '\0';		/* terminate result */
597  
598  	return size;
599  }
600  
601  
602  /*
603   * himport()
604   */
605  
606  /*
607   * Import linearized data into hash table.
608   *
609   * This is the inverse function to hexport(): it takes a linear list
610   * of "name=value" pairs and creates hash table entries from it.
611   *
612   * Entries without "value", i. e. consisting of only "name" or
613   * "name=", will cause this entry to be deleted from the hash table.
614   *
615   * The "flag" argument can be used to control the behaviour: when the
616   * H_NOCLEAR bit is set, then an existing hash table will kept, i. e.
617   * new data will be added to an existing hash table; otherwise, old
618   * data will be discarded and a new hash table will be created.
619   *
620   * The separator character for the "name=value" pairs can be selected,
621   * so we both support importing from externally stored environment
622   * data (separated by NUL characters) and from plain text files
623   * (entries separated by newline characters).
624   *
625   * To allow for nicely formatted text input, leading white space
626   * (sequences of SPACE and TAB chars) is ignored, and entries starting
627   * (after removal of any leading white space) with a '#' character are
628   * considered comments and ignored.
629   *
630   * [NOTE: this means that a variable name cannot start with a '#'
631   * character.]
632   *
633   * When using a non-NUL separator character, backslash is used as
634   * escape character in the value part, allowing for example for
635   * multi-line values.
636   *
637   * In theory, arbitrary separator characters can be used, but only
638   * '\0' and '\n' have really been tested.
639   */
640  
641  int himport_r(struct hsearch_data *htab,
642  	      const char *env, size_t size, const char sep, int flag)
643  {
644  	char *data, *sp, *dp, *name, *value;
645  
646  	/* Test for correct arguments.  */
647  	if (htab == NULL) {
648  		__set_errno(EINVAL);
649  		return 0;
650  	}
651  
652  	/* we allocate new space to make sure we can write to the array */
653  	if ((data = malloc(size)) == NULL) {
654  		debug("himport_r: can't malloc %zu bytes\n", size);
655  		__set_errno(ENOMEM);
656  		return 0;
657  	}
658  	memcpy(data, env, size);
659  	dp = data;
660  
661  	if ((flag & H_NOCLEAR) == 0) {
662  		/* Destroy old hash table if one exists */
663  		debug("Destroy Hash Table: %p table = %p\n", htab,
664  		       htab->table);
665  		if (htab->table)
666  			hdestroy_r(htab);
667  	}
668  
669  	/*
670  	 * Create new hash table (if needed).  The computation of the hash
671  	 * table size is based on heuristics: in a sample of some 70+
672  	 * existing systems we found an average size of 39+ bytes per entry
673  	 * in the environment (for the whole key=value pair). Assuming a
674  	 * size of 8 per entry (= safety factor of ~5) should provide enough
675  	 * safety margin for any existing environment definitions and still
676  	 * allow for more than enough dynamic additions. Note that the
677  	 * "size" argument is supposed to give the maximum enviroment size
678  	 * (CONFIG_ENV_SIZE).  This heuristics will result in
679  	 * unreasonably large numbers (and thus memory footprint) for
680  	 * big flash environments (>8,000 entries for 64 KB
681  	 * envrionment size), so we clip it to a reasonable value.
682  	 * On the other hand we need to add some more entries for free
683  	 * space when importing very small buffers. Both boundaries can
684  	 * be overwritten in the board config file if needed.
685  	 */
686  
687  	if (!htab->table) {
688  		int nent = CONFIG_ENV_MIN_ENTRIES + size / 8;
689  
690  		if (nent > CONFIG_ENV_MAX_ENTRIES)
691  			nent = CONFIG_ENV_MAX_ENTRIES;
692  
693  		debug("Create Hash Table: N=%d\n", nent);
694  
695  		if (hcreate_r(nent, htab) == 0) {
696  			free(data);
697  			return 0;
698  		}
699  	}
700  
701  	/* Parse environment; allow for '\0' and 'sep' as separators */
702  	do {
703  		ENTRY e, *rv;
704  
705  		/* skip leading white space */
706  		while (isblank(*dp))
707  			++dp;
708  
709  		/* skip comment lines */
710  		if (*dp == '#') {
711  			while (*dp && (*dp != sep))
712  				++dp;
713  			++dp;
714  			continue;
715  		}
716  
717  		/* parse name */
718  		for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp)
719  			;
720  
721  		/* deal with "name" and "name=" entries (delete var) */
722  		if (*dp == '\0' || *(dp + 1) == '\0' ||
723  		    *dp == sep || *(dp + 1) == sep) {
724  			if (*dp == '=')
725  				*dp++ = '\0';
726  			*dp++ = '\0';	/* terminate name */
727  
728  			debug("DELETE CANDIDATE: \"%s\"\n", name);
729  
730  			if (hdelete_r(name, htab) == 0)
731  				debug("DELETE ERROR ##############################\n");
732  
733  			continue;
734  		}
735  		*dp++ = '\0';	/* terminate name */
736  
737  		/* parse value; deal with escapes */
738  		for (value = sp = dp; *dp && (*dp != sep); ++dp) {
739  			if ((*dp == '\\') && *(dp + 1))
740  				++dp;
741  			*sp++ = *dp;
742  		}
743  		*sp++ = '\0';	/* terminate value */
744  		++dp;
745  
746  		/* enter into hash table */
747  		e.key = name;
748  		e.data = value;
749  
750  		hsearch_r(e, ENTER, &rv, htab);
751  		if (rv == NULL) {
752  			printf("himport_r: can't insert \"%s=%s\" into hash table\n",
753  				name, value);
754  			return 0;
755  		}
756  
757  		debug("INSERT: table %p, filled %d/%d rv %p ==> name=\"%s\" value=\"%s\"\n",
758  			htab, htab->filled, htab->size,
759  			rv, name, value);
760  	} while ((dp < data + size) && *dp);	/* size check needed for text */
761  						/* without '\0' termination */
762  	debug("INSERT: free(data = %p)\n", data);
763  	free(data);
764  
765  	debug("INSERT: done\n");
766  	return 1;		/* everything OK */
767  }
768