1 /* 2 * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Thomas Graf <tgraf@suug.ch> 10 * 11 * ========================================================================== 12 * 13 * Implements a linear-time string-matching algorithm due to Knuth, 14 * Morris, and Pratt [1]. Their algorithm avoids the explicit 15 * computation of the transition function DELTA altogether. Its 16 * matching time is O(n), for n being length(text), using just an 17 * auxiliary function PI[1..m], for m being length(pattern), 18 * precomputed from the pattern in time O(m). The array PI allows 19 * the transition function DELTA to be computed efficiently 20 * "on the fly" as needed. Roughly speaking, for any state 21 * "q" = 0,1,...,m and any character "a" in SIGMA, the value 22 * PI["q"] contains the information that is independent of "a" and 23 * is needed to compute DELTA("q", "a") [2]. Since the array PI 24 * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we 25 * save a factor of |SIGMA| in the preprocessing time by computing 26 * PI rather than DELTA. 27 * 28 * [1] Cormen, Leiserson, Rivest, Stein 29 * Introdcution to Algorithms, 2nd Edition, MIT Press 30 * [2] See finite automation theory 31 */ 32 33 #include <linux/module.h> 34 #include <linux/types.h> 35 #include <linux/string.h> 36 #include <linux/textsearch.h> 37 38 struct ts_kmp 39 { 40 u8 * pattern; 41 unsigned int pattern_len; 42 unsigned int prefix_tbl[0]; 43 }; 44 45 static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state) 46 { 47 struct ts_kmp *kmp = ts_config_priv(conf); 48 unsigned int i, q = 0, text_len, consumed = state->offset; 49 const u8 *text; 50 51 for (;;) { 52 text_len = conf->get_next_block(consumed, &text, conf, state); 53 54 if (unlikely(text_len == 0)) 55 break; 56 57 for (i = 0; i < text_len; i++) { 58 while (q > 0 && kmp->pattern[q] != text[i]) 59 q = kmp->prefix_tbl[q - 1]; 60 if (kmp->pattern[q] == text[i]) 61 q++; 62 if (unlikely(q == kmp->pattern_len)) { 63 state->offset = consumed + i + 1; 64 return state->offset - kmp->pattern_len; 65 } 66 } 67 68 consumed += text_len; 69 } 70 71 return UINT_MAX; 72 } 73 74 static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len, 75 unsigned int *prefix_tbl) 76 { 77 unsigned int k, q; 78 79 for (k = 0, q = 1; q < len; q++) { 80 while (k > 0 && pattern[k] != pattern[q]) 81 k = prefix_tbl[k-1]; 82 if (pattern[k] == pattern[q]) 83 k++; 84 prefix_tbl[q] = k; 85 } 86 } 87 88 static struct ts_config *kmp_init(const void *pattern, unsigned int len, 89 gfp_t gfp_mask) 90 { 91 struct ts_config *conf; 92 struct ts_kmp *kmp; 93 unsigned int prefix_tbl_len = len * sizeof(unsigned int); 94 size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len; 95 96 conf = alloc_ts_config(priv_size, gfp_mask); 97 if (IS_ERR(conf)) 98 return conf; 99 100 kmp = ts_config_priv(conf); 101 kmp->pattern_len = len; 102 compute_prefix_tbl(pattern, len, kmp->prefix_tbl); 103 kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len; 104 memcpy(kmp->pattern, pattern, len); 105 106 return conf; 107 } 108 109 static void *kmp_get_pattern(struct ts_config *conf) 110 { 111 struct ts_kmp *kmp = ts_config_priv(conf); 112 return kmp->pattern; 113 } 114 115 static unsigned int kmp_get_pattern_len(struct ts_config *conf) 116 { 117 struct ts_kmp *kmp = ts_config_priv(conf); 118 return kmp->pattern_len; 119 } 120 121 static struct ts_ops kmp_ops = { 122 .name = "kmp", 123 .find = kmp_find, 124 .init = kmp_init, 125 .get_pattern = kmp_get_pattern, 126 .get_pattern_len = kmp_get_pattern_len, 127 .owner = THIS_MODULE, 128 .list = LIST_HEAD_INIT(kmp_ops.list) 129 }; 130 131 static int __init init_kmp(void) 132 { 133 return textsearch_register(&kmp_ops); 134 } 135 136 static void __exit exit_kmp(void) 137 { 138 textsearch_unregister(&kmp_ops); 139 } 140 141 MODULE_LICENSE("GPL"); 142 143 module_init(init_kmp); 144 module_exit(exit_kmp); 145