1 /* 2 * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Thomas Graf <tgraf@suug.ch> 10 * 11 * ========================================================================== 12 * 13 * Implements a linear-time string-matching algorithm due to Knuth, 14 * Morris, and Pratt [1]. Their algorithm avoids the explicit 15 * computation of the transition function DELTA altogether. Its 16 * matching time is O(n), for n being length(text), using just an 17 * auxiliary function PI[1..m], for m being length(pattern), 18 * precomputed from the pattern in time O(m). The array PI allows 19 * the transition function DELTA to be computed efficiently 20 * "on the fly" as needed. Roughly speaking, for any state 21 * "q" = 0,1,...,m and any character "a" in SIGMA, the value 22 * PI["q"] contains the information that is independent of "a" and 23 * is needed to compute DELTA("q", "a") [2]. Since the array PI 24 * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we 25 * save a factor of |SIGMA| in the preprocessing time by computing 26 * PI rather than DELTA. 27 * 28 * [1] Cormen, Leiserson, Rivest, Stein 29 * Introdcution to Algorithms, 2nd Edition, MIT Press 30 * [2] See finite automation theory 31 */ 32 33 #include <linux/config.h> 34 #include <linux/module.h> 35 #include <linux/types.h> 36 #include <linux/string.h> 37 #include <linux/textsearch.h> 38 39 struct ts_kmp 40 { 41 u8 * pattern; 42 unsigned int pattern_len; 43 unsigned int prefix_tbl[0]; 44 }; 45 46 static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state) 47 { 48 struct ts_kmp *kmp = ts_config_priv(conf); 49 unsigned int i, q = 0, text_len, consumed = state->offset; 50 const u8 *text; 51 52 for (;;) { 53 text_len = conf->get_next_block(consumed, &text, conf, state); 54 55 if (unlikely(text_len == 0)) 56 break; 57 58 for (i = 0; i < text_len; i++) { 59 while (q > 0 && kmp->pattern[q] != text[i]) 60 q = kmp->prefix_tbl[q - 1]; 61 if (kmp->pattern[q] == text[i]) 62 q++; 63 if (unlikely(q == kmp->pattern_len)) { 64 state->offset = consumed + i + 1; 65 return state->offset - kmp->pattern_len; 66 } 67 } 68 69 consumed += text_len; 70 } 71 72 return UINT_MAX; 73 } 74 75 static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len, 76 unsigned int *prefix_tbl) 77 { 78 unsigned int k, q; 79 80 for (k = 0, q = 1; q < len; q++) { 81 while (k > 0 && pattern[k] != pattern[q]) 82 k = prefix_tbl[k-1]; 83 if (pattern[k] == pattern[q]) 84 k++; 85 prefix_tbl[q] = k; 86 } 87 } 88 89 static struct ts_config *kmp_init(const void *pattern, unsigned int len, 90 gfp_t gfp_mask) 91 { 92 struct ts_config *conf; 93 struct ts_kmp *kmp; 94 unsigned int prefix_tbl_len = len * sizeof(unsigned int); 95 size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len; 96 97 conf = alloc_ts_config(priv_size, gfp_mask); 98 if (IS_ERR(conf)) 99 return conf; 100 101 kmp = ts_config_priv(conf); 102 kmp->pattern_len = len; 103 compute_prefix_tbl(pattern, len, kmp->prefix_tbl); 104 kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len; 105 memcpy(kmp->pattern, pattern, len); 106 107 return conf; 108 } 109 110 static void *kmp_get_pattern(struct ts_config *conf) 111 { 112 struct ts_kmp *kmp = ts_config_priv(conf); 113 return kmp->pattern; 114 } 115 116 static unsigned int kmp_get_pattern_len(struct ts_config *conf) 117 { 118 struct ts_kmp *kmp = ts_config_priv(conf); 119 return kmp->pattern_len; 120 } 121 122 static struct ts_ops kmp_ops = { 123 .name = "kmp", 124 .find = kmp_find, 125 .init = kmp_init, 126 .get_pattern = kmp_get_pattern, 127 .get_pattern_len = kmp_get_pattern_len, 128 .owner = THIS_MODULE, 129 .list = LIST_HEAD_INIT(kmp_ops.list) 130 }; 131 132 static int __init init_kmp(void) 133 { 134 return textsearch_register(&kmp_ops); 135 } 136 137 static void __exit exit_kmp(void) 138 { 139 textsearch_unregister(&kmp_ops); 140 } 141 142 MODULE_LICENSE("GPL"); 143 144 module_init(init_kmp); 145 module_exit(exit_kmp); 146