xref: /openbmc/linux/lib/ts_kmp.c (revision 09b35b41)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * lib/ts_kmp.c		Knuth-Morris-Pratt text search implementation
4   *
5   * Authors:	Thomas Graf <tgraf@suug.ch>
6   *
7   * ==========================================================================
8   *
9   *   Implements a linear-time string-matching algorithm due to Knuth,
10   *   Morris, and Pratt [1]. Their algorithm avoids the explicit
11   *   computation of the transition function DELTA altogether. Its
12   *   matching time is O(n), for n being length(text), using just an
13   *   auxiliary function PI[1..m], for m being length(pattern),
14   *   precomputed from the pattern in time O(m). The array PI allows
15   *   the transition function DELTA to be computed efficiently
16   *   "on the fly" as needed. Roughly speaking, for any state
17   *   "q" = 0,1,...,m and any character "a" in SIGMA, the value
18   *   PI["q"] contains the information that is independent of "a" and
19   *   is needed to compute DELTA("q", "a") [2]. Since the array PI
20   *   has only m entries, whereas DELTA has O(m|SIGMA|) entries, we
21   *   save a factor of |SIGMA| in the preprocessing time by computing
22   *   PI rather than DELTA.
23   *
24   *   [1] Cormen, Leiserson, Rivest, Stein
25   *       Introdcution to Algorithms, 2nd Edition, MIT Press
26   *   [2] See finite automaton theory
27   */
28  
29  #include <linux/module.h>
30  #include <linux/types.h>
31  #include <linux/string.h>
32  #include <linux/ctype.h>
33  #include <linux/textsearch.h>
34  
35  struct ts_kmp
36  {
37  	u8 *		pattern;
38  	unsigned int	pattern_len;
39  	unsigned int 	prefix_tbl[0];
40  };
41  
42  static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state)
43  {
44  	struct ts_kmp *kmp = ts_config_priv(conf);
45  	unsigned int i, q = 0, text_len, consumed = state->offset;
46  	const u8 *text;
47  	const int icase = conf->flags & TS_IGNORECASE;
48  
49  	for (;;) {
50  		text_len = conf->get_next_block(consumed, &text, conf, state);
51  
52  		if (unlikely(text_len == 0))
53  			break;
54  
55  		for (i = 0; i < text_len; i++) {
56  			while (q > 0 && kmp->pattern[q]
57  			    != (icase ? toupper(text[i]) : text[i]))
58  				q = kmp->prefix_tbl[q - 1];
59  			if (kmp->pattern[q]
60  			    == (icase ? toupper(text[i]) : text[i]))
61  				q++;
62  			if (unlikely(q == kmp->pattern_len)) {
63  				state->offset = consumed + i + 1;
64  				return state->offset - kmp->pattern_len;
65  			}
66  		}
67  
68  		consumed += text_len;
69  	}
70  
71  	return UINT_MAX;
72  }
73  
74  static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len,
75  				      unsigned int *prefix_tbl, int flags)
76  {
77  	unsigned int k, q;
78  	const u8 icase = flags & TS_IGNORECASE;
79  
80  	for (k = 0, q = 1; q < len; q++) {
81  		while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k])
82  		    != (icase ? toupper(pattern[q]) : pattern[q]))
83  			k = prefix_tbl[k-1];
84  		if ((icase ? toupper(pattern[k]) : pattern[k])
85  		    == (icase ? toupper(pattern[q]) : pattern[q]))
86  			k++;
87  		prefix_tbl[q] = k;
88  	}
89  }
90  
91  static struct ts_config *kmp_init(const void *pattern, unsigned int len,
92  				  gfp_t gfp_mask, int flags)
93  {
94  	struct ts_config *conf;
95  	struct ts_kmp *kmp;
96  	int i;
97  	unsigned int prefix_tbl_len = len * sizeof(unsigned int);
98  	size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len;
99  
100  	conf = alloc_ts_config(priv_size, gfp_mask);
101  	if (IS_ERR(conf))
102  		return conf;
103  
104  	conf->flags = flags;
105  	kmp = ts_config_priv(conf);
106  	kmp->pattern_len = len;
107  	compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags);
108  	kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len;
109  	if (flags & TS_IGNORECASE)
110  		for (i = 0; i < len; i++)
111  			kmp->pattern[i] = toupper(((u8 *)pattern)[i]);
112  	else
113  		memcpy(kmp->pattern, pattern, len);
114  
115  	return conf;
116  }
117  
118  static void *kmp_get_pattern(struct ts_config *conf)
119  {
120  	struct ts_kmp *kmp = ts_config_priv(conf);
121  	return kmp->pattern;
122  }
123  
124  static unsigned int kmp_get_pattern_len(struct ts_config *conf)
125  {
126  	struct ts_kmp *kmp = ts_config_priv(conf);
127  	return kmp->pattern_len;
128  }
129  
130  static struct ts_ops kmp_ops = {
131  	.name		  = "kmp",
132  	.find		  = kmp_find,
133  	.init		  = kmp_init,
134  	.get_pattern	  = kmp_get_pattern,
135  	.get_pattern_len  = kmp_get_pattern_len,
136  	.owner		  = THIS_MODULE,
137  	.list		  = LIST_HEAD_INIT(kmp_ops.list)
138  };
139  
140  static int __init init_kmp(void)
141  {
142  	return textsearch_register(&kmp_ops);
143  }
144  
145  static void __exit exit_kmp(void)
146  {
147  	textsearch_unregister(&kmp_ops);
148  }
149  
150  MODULE_LICENSE("GPL");
151  
152  module_init(init_kmp);
153  module_exit(exit_kmp);
154