1df3fb93aSThomas Graf /* 2df3fb93aSThomas Graf * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation 3df3fb93aSThomas Graf * 4df3fb93aSThomas Graf * This program is free software; you can redistribute it and/or 5df3fb93aSThomas Graf * modify it under the terms of the GNU General Public License 6df3fb93aSThomas Graf * as published by the Free Software Foundation; either version 7df3fb93aSThomas Graf * 2 of the License, or (at your option) any later version. 8df3fb93aSThomas Graf * 9df3fb93aSThomas Graf * Authors: Thomas Graf <tgraf@suug.ch> 10df3fb93aSThomas Graf * 11df3fb93aSThomas Graf * ========================================================================== 12df3fb93aSThomas Graf * 13df3fb93aSThomas Graf * Implements a linear-time string-matching algorithm due to Knuth, 14df3fb93aSThomas Graf * Morris, and Pratt [1]. Their algorithm avoids the explicit 15df3fb93aSThomas Graf * computation of the transition function DELTA altogether. Its 16df3fb93aSThomas Graf * matching time is O(n), for n being length(text), using just an 17df3fb93aSThomas Graf * auxiliary function PI[1..m], for m being length(pattern), 18df3fb93aSThomas Graf * precomputed from the pattern in time O(m). The array PI allows 19df3fb93aSThomas Graf * the transition function DELTA to be computed efficiently 20df3fb93aSThomas Graf * "on the fly" as needed. Roughly speaking, for any state 21df3fb93aSThomas Graf * "q" = 0,1,...,m and any character "a" in SIGMA, the value 22df3fb93aSThomas Graf * PI["q"] contains the information that is independent of "a" and 23df3fb93aSThomas Graf * is needed to compute DELTA("q", "a") [2]. Since the array PI 24df3fb93aSThomas Graf * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we 25df3fb93aSThomas Graf * save a factor of |SIGMA| in the preprocessing time by computing 26df3fb93aSThomas Graf * PI rather than DELTA. 27df3fb93aSThomas Graf * 28df3fb93aSThomas Graf * [1] Cormen, Leiserson, Rivest, Stein 29df3fb93aSThomas Graf * Introdcution to Algorithms, 2nd Edition, MIT Press 30*7433a8d6SRandy Dunlap * [2] See finite automaton theory 31df3fb93aSThomas Graf */ 32df3fb93aSThomas Graf 33df3fb93aSThomas Graf #include <linux/module.h> 34df3fb93aSThomas Graf #include <linux/types.h> 35df3fb93aSThomas Graf #include <linux/string.h> 362523c3fcSJoonwoo Park #include <linux/ctype.h> 37df3fb93aSThomas Graf #include <linux/textsearch.h> 38df3fb93aSThomas Graf 39df3fb93aSThomas Graf struct ts_kmp 40df3fb93aSThomas Graf { 41df3fb93aSThomas Graf u8 * pattern; 42df3fb93aSThomas Graf unsigned int pattern_len; 43df3fb93aSThomas Graf unsigned int prefix_tbl[0]; 44df3fb93aSThomas Graf }; 45df3fb93aSThomas Graf 46df3fb93aSThomas Graf static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state) 47df3fb93aSThomas Graf { 48df3fb93aSThomas Graf struct ts_kmp *kmp = ts_config_priv(conf); 49df3fb93aSThomas Graf unsigned int i, q = 0, text_len, consumed = state->offset; 50df3fb93aSThomas Graf const u8 *text; 512523c3fcSJoonwoo Park const int icase = conf->flags & TS_IGNORECASE; 52df3fb93aSThomas Graf 53df3fb93aSThomas Graf for (;;) { 54df3fb93aSThomas Graf text_len = conf->get_next_block(consumed, &text, conf, state); 55df3fb93aSThomas Graf 56df3fb93aSThomas Graf if (unlikely(text_len == 0)) 57df3fb93aSThomas Graf break; 58df3fb93aSThomas Graf 59df3fb93aSThomas Graf for (i = 0; i < text_len; i++) { 602523c3fcSJoonwoo Park while (q > 0 && kmp->pattern[q] 612523c3fcSJoonwoo Park != (icase ? toupper(text[i]) : text[i])) 62df3fb93aSThomas Graf q = kmp->prefix_tbl[q - 1]; 632523c3fcSJoonwoo Park if (kmp->pattern[q] 642523c3fcSJoonwoo Park == (icase ? toupper(text[i]) : text[i])) 65df3fb93aSThomas Graf q++; 66df3fb93aSThomas Graf if (unlikely(q == kmp->pattern_len)) { 67df3fb93aSThomas Graf state->offset = consumed + i + 1; 68df3fb93aSThomas Graf return state->offset - kmp->pattern_len; 69df3fb93aSThomas Graf } 70df3fb93aSThomas Graf } 71df3fb93aSThomas Graf 72df3fb93aSThomas Graf consumed += text_len; 73df3fb93aSThomas Graf } 74df3fb93aSThomas Graf 75df3fb93aSThomas Graf return UINT_MAX; 76df3fb93aSThomas Graf } 77df3fb93aSThomas Graf 78df3fb93aSThomas Graf static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len, 792523c3fcSJoonwoo Park unsigned int *prefix_tbl, int flags) 80df3fb93aSThomas Graf { 81df3fb93aSThomas Graf unsigned int k, q; 822523c3fcSJoonwoo Park const u8 icase = flags & TS_IGNORECASE; 83df3fb93aSThomas Graf 84df3fb93aSThomas Graf for (k = 0, q = 1; q < len; q++) { 852523c3fcSJoonwoo Park while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k]) 862523c3fcSJoonwoo Park != (icase ? toupper(pattern[q]) : pattern[q])) 87df3fb93aSThomas Graf k = prefix_tbl[k-1]; 882523c3fcSJoonwoo Park if ((icase ? toupper(pattern[k]) : pattern[k]) 892523c3fcSJoonwoo Park == (icase ? toupper(pattern[q]) : pattern[q])) 90df3fb93aSThomas Graf k++; 91df3fb93aSThomas Graf prefix_tbl[q] = k; 92df3fb93aSThomas Graf } 93df3fb93aSThomas Graf } 94df3fb93aSThomas Graf 95df3fb93aSThomas Graf static struct ts_config *kmp_init(const void *pattern, unsigned int len, 962523c3fcSJoonwoo Park gfp_t gfp_mask, int flags) 97df3fb93aSThomas Graf { 98df3fb93aSThomas Graf struct ts_config *conf; 99df3fb93aSThomas Graf struct ts_kmp *kmp; 1002523c3fcSJoonwoo Park int i; 101df3fb93aSThomas Graf unsigned int prefix_tbl_len = len * sizeof(unsigned int); 102df3fb93aSThomas Graf size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len; 103df3fb93aSThomas Graf 104df3fb93aSThomas Graf conf = alloc_ts_config(priv_size, gfp_mask); 105df3fb93aSThomas Graf if (IS_ERR(conf)) 106df3fb93aSThomas Graf return conf; 107df3fb93aSThomas Graf 1082523c3fcSJoonwoo Park conf->flags = flags; 109df3fb93aSThomas Graf kmp = ts_config_priv(conf); 110df3fb93aSThomas Graf kmp->pattern_len = len; 1112523c3fcSJoonwoo Park compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags); 112df3fb93aSThomas Graf kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len; 1132523c3fcSJoonwoo Park if (flags & TS_IGNORECASE) 1142523c3fcSJoonwoo Park for (i = 0; i < len; i++) 1152523c3fcSJoonwoo Park kmp->pattern[i] = toupper(((u8 *)pattern)[i]); 1162523c3fcSJoonwoo Park else 117df3fb93aSThomas Graf memcpy(kmp->pattern, pattern, len); 118df3fb93aSThomas Graf 119df3fb93aSThomas Graf return conf; 120df3fb93aSThomas Graf } 121df3fb93aSThomas Graf 122df3fb93aSThomas Graf static void *kmp_get_pattern(struct ts_config *conf) 123df3fb93aSThomas Graf { 124df3fb93aSThomas Graf struct ts_kmp *kmp = ts_config_priv(conf); 125df3fb93aSThomas Graf return kmp->pattern; 126df3fb93aSThomas Graf } 127df3fb93aSThomas Graf 128df3fb93aSThomas Graf static unsigned int kmp_get_pattern_len(struct ts_config *conf) 129df3fb93aSThomas Graf { 130df3fb93aSThomas Graf struct ts_kmp *kmp = ts_config_priv(conf); 131df3fb93aSThomas Graf return kmp->pattern_len; 132df3fb93aSThomas Graf } 133df3fb93aSThomas Graf 134df3fb93aSThomas Graf static struct ts_ops kmp_ops = { 135df3fb93aSThomas Graf .name = "kmp", 136df3fb93aSThomas Graf .find = kmp_find, 137df3fb93aSThomas Graf .init = kmp_init, 138df3fb93aSThomas Graf .get_pattern = kmp_get_pattern, 139df3fb93aSThomas Graf .get_pattern_len = kmp_get_pattern_len, 140df3fb93aSThomas Graf .owner = THIS_MODULE, 141df3fb93aSThomas Graf .list = LIST_HEAD_INIT(kmp_ops.list) 142df3fb93aSThomas Graf }; 143df3fb93aSThomas Graf 144df3fb93aSThomas Graf static int __init init_kmp(void) 145df3fb93aSThomas Graf { 146df3fb93aSThomas Graf return textsearch_register(&kmp_ops); 147df3fb93aSThomas Graf } 148df3fb93aSThomas Graf 149df3fb93aSThomas Graf static void __exit exit_kmp(void) 150df3fb93aSThomas Graf { 151df3fb93aSThomas Graf textsearch_unregister(&kmp_ops); 152df3fb93aSThomas Graf } 153df3fb93aSThomas Graf 154df3fb93aSThomas Graf MODULE_LICENSE("GPL"); 155df3fb93aSThomas Graf 156df3fb93aSThomas Graf module_init(init_kmp); 157df3fb93aSThomas Graf module_exit(exit_kmp); 158