12de4ff7bSThomas Graf /* 22de4ff7bSThomas Graf * lib/textsearch.c Generic text search interface 32de4ff7bSThomas Graf * 42de4ff7bSThomas Graf * This program is free software; you can redistribute it and/or 52de4ff7bSThomas Graf * modify it under the terms of the GNU General Public License 62de4ff7bSThomas Graf * as published by the Free Software Foundation; either version 72de4ff7bSThomas Graf * 2 of the License, or (at your option) any later version. 82de4ff7bSThomas Graf * 92de4ff7bSThomas Graf * Authors: Thomas Graf <tgraf@suug.ch> 10e03ba84aSPablo Neira Ayuso * Pablo Neira Ayuso <pablo@netfilter.org> 112de4ff7bSThomas Graf * 122de4ff7bSThomas Graf * ========================================================================== 132de4ff7bSThomas Graf * 142de4ff7bSThomas Graf * INTRODUCTION 152de4ff7bSThomas Graf * 16de0368d5SJesper Dangaard Brouer * The textsearch infrastructure provides text searching facilities for 172de4ff7bSThomas Graf * both linear and non-linear data. Individual search algorithms are 182de4ff7bSThomas Graf * implemented in modules and chosen by the user. 192de4ff7bSThomas Graf * 202de4ff7bSThomas Graf * ARCHITECTURE 212de4ff7bSThomas Graf * 222de4ff7bSThomas Graf * User 232de4ff7bSThomas Graf * +----------------+ 242de4ff7bSThomas Graf * | finish()|<--------------(6)-----------------+ 252de4ff7bSThomas Graf * |get_next_block()|<--------------(5)---------------+ | 262de4ff7bSThomas Graf * | | Algorithm | | 272de4ff7bSThomas Graf * | | +------------------------------+ 282de4ff7bSThomas Graf * | | | init() find() destroy() | 292de4ff7bSThomas Graf * | | +------------------------------+ 302de4ff7bSThomas Graf * | | Core API ^ ^ ^ 312de4ff7bSThomas Graf * | | +---------------+ (2) (4) (8) 322de4ff7bSThomas Graf * | (1)|----->| prepare() |---+ | | 332de4ff7bSThomas Graf * | (3)|----->| find()/next() |-----------+ | 342de4ff7bSThomas Graf * | (7)|----->| destroy() |----------------------+ 352de4ff7bSThomas Graf * +----------------+ +---------------+ 362de4ff7bSThomas Graf * 372de4ff7bSThomas Graf * (1) User configures a search by calling _prepare() specifying the 382de4ff7bSThomas Graf * search parameters such as the pattern and algorithm name. 392de4ff7bSThomas Graf * (2) Core requests the algorithm to allocate and initialize a search 402de4ff7bSThomas Graf * configuration according to the specified parameters. 412de4ff7bSThomas Graf * (3) User starts the search(es) by calling _find() or _next() to 422de4ff7bSThomas Graf * fetch subsequent occurrences. A state variable is provided 43d08df601SRobert P. J. Day * to the algorithm to store persistent variables. 442de4ff7bSThomas Graf * (4) Core eventually resets the search offset and forwards the find() 452de4ff7bSThomas Graf * request to the algorithm. 46de0368d5SJesper Dangaard Brouer * (5) Algorithm calls get_next_block() provided by the user continuously 472de4ff7bSThomas Graf * to fetch the data to be searched in block by block. 482de4ff7bSThomas Graf * (6) Algorithm invokes finish() after the last call to get_next_block 492de4ff7bSThomas Graf * to clean up any leftovers from get_next_block. (Optional) 502de4ff7bSThomas Graf * (7) User destroys the configuration by calling _destroy(). 512de4ff7bSThomas Graf * (8) Core notifies the algorithm to destroy algorithm specific 522de4ff7bSThomas Graf * allocations. (Optional) 532de4ff7bSThomas Graf * 542de4ff7bSThomas Graf * USAGE 552de4ff7bSThomas Graf * 562de4ff7bSThomas Graf * Before a search can be performed, a configuration must be created 57b9c79678SJoonwoo Park * by calling textsearch_prepare() specifying the searching algorithm, 58b9c79678SJoonwoo Park * the pattern to look for and flags. As a flag, you can set TS_IGNORECASE 59b9c79678SJoonwoo Park * to perform case insensitive matching. But it might slow down 60b9c79678SJoonwoo Park * performance of algorithm, so you should use it at own your risk. 61de0368d5SJesper Dangaard Brouer * The returned configuration may then be used for an arbitrary 62b9c79678SJoonwoo Park * amount of times and even in parallel as long as a separate struct 63b9c79678SJoonwoo Park * ts_state variable is provided to every instance. 642de4ff7bSThomas Graf * 652de4ff7bSThomas Graf * The actual search is performed by either calling textsearch_find_- 662de4ff7bSThomas Graf * continuous() for linear data or by providing an own get_next_block() 672de4ff7bSThomas Graf * implementation and calling textsearch_find(). Both functions return 68de0368d5SJesper Dangaard Brouer * the position of the first occurrence of the pattern or UINT_MAX if 69de0368d5SJesper Dangaard Brouer * no match was found. Subsequent occurrences can be found by calling 702de4ff7bSThomas Graf * textsearch_next() regardless of the linearity of the data. 712de4ff7bSThomas Graf * 722de4ff7bSThomas Graf * Once you're done using a configuration it must be given back via 732de4ff7bSThomas Graf * textsearch_destroy. 742de4ff7bSThomas Graf * 752de4ff7bSThomas Graf * EXAMPLE 762de4ff7bSThomas Graf * 772de4ff7bSThomas Graf * int pos; 782de4ff7bSThomas Graf * struct ts_config *conf; 792de4ff7bSThomas Graf * struct ts_state state; 802de4ff7bSThomas Graf * const char *pattern = "chicken"; 812de4ff7bSThomas Graf * const char *example = "We dance the funky chicken"; 822de4ff7bSThomas Graf * 832de4ff7bSThomas Graf * conf = textsearch_prepare("kmp", pattern, strlen(pattern), 842de4ff7bSThomas Graf * GFP_KERNEL, TS_AUTOLOAD); 852de4ff7bSThomas Graf * if (IS_ERR(conf)) { 862de4ff7bSThomas Graf * err = PTR_ERR(conf); 872de4ff7bSThomas Graf * goto errout; 882de4ff7bSThomas Graf * } 892de4ff7bSThomas Graf * 902de4ff7bSThomas Graf * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); 912de4ff7bSThomas Graf * if (pos != UINT_MAX) 922de4ff7bSThomas Graf * panic("Oh my god, dancing chickens at %d\n", pos); 932de4ff7bSThomas Graf * 942de4ff7bSThomas Graf * textsearch_destroy(conf); 952de4ff7bSThomas Graf * ========================================================================== 962de4ff7bSThomas Graf */ 972de4ff7bSThomas Graf 982de4ff7bSThomas Graf #include <linux/module.h> 992de4ff7bSThomas Graf #include <linux/types.h> 1002de4ff7bSThomas Graf #include <linux/string.h> 1012de4ff7bSThomas Graf #include <linux/init.h> 10282524746SFranck Bui-Huu #include <linux/rculist.h> 1032de4ff7bSThomas Graf #include <linux/rcupdate.h> 1042de4ff7bSThomas Graf #include <linux/err.h> 1052de4ff7bSThomas Graf #include <linux/textsearch.h> 1065a0e3ad6STejun Heo #include <linux/slab.h> 1072de4ff7bSThomas Graf 1082de4ff7bSThomas Graf static LIST_HEAD(ts_ops); 1092de4ff7bSThomas Graf static DEFINE_SPINLOCK(ts_mod_lock); 1102de4ff7bSThomas Graf 1112de4ff7bSThomas Graf static inline struct ts_ops *lookup_ts_algo(const char *name) 1122de4ff7bSThomas Graf { 1132de4ff7bSThomas Graf struct ts_ops *o; 1142de4ff7bSThomas Graf 1152de4ff7bSThomas Graf rcu_read_lock(); 1162de4ff7bSThomas Graf list_for_each_entry_rcu(o, &ts_ops, list) { 1172de4ff7bSThomas Graf if (!strcmp(name, o->name)) { 1182de4ff7bSThomas Graf if (!try_module_get(o->owner)) 1192de4ff7bSThomas Graf o = NULL; 1202de4ff7bSThomas Graf rcu_read_unlock(); 1212de4ff7bSThomas Graf return o; 1222de4ff7bSThomas Graf } 1232de4ff7bSThomas Graf } 1242de4ff7bSThomas Graf rcu_read_unlock(); 1252de4ff7bSThomas Graf 1262de4ff7bSThomas Graf return NULL; 1272de4ff7bSThomas Graf } 1282de4ff7bSThomas Graf 1292de4ff7bSThomas Graf /** 1302de4ff7bSThomas Graf * textsearch_register - register a textsearch module 1312de4ff7bSThomas Graf * @ops: operations lookup table 1322de4ff7bSThomas Graf * 1332de4ff7bSThomas Graf * This function must be called by textsearch modules to announce 1342de4ff7bSThomas Graf * their presence. The specified &@ops must have %name set to a 1352de4ff7bSThomas Graf * unique identifier and the callbacks find(), init(), get_pattern(), 1362de4ff7bSThomas Graf * and get_pattern_len() must be implemented. 1372de4ff7bSThomas Graf * 1382de4ff7bSThomas Graf * Returns 0 or -EEXISTS if another module has already registered 1392de4ff7bSThomas Graf * with same name. 1402de4ff7bSThomas Graf */ 1412de4ff7bSThomas Graf int textsearch_register(struct ts_ops *ops) 1422de4ff7bSThomas Graf { 1432de4ff7bSThomas Graf int err = -EEXIST; 1442de4ff7bSThomas Graf struct ts_ops *o; 1452de4ff7bSThomas Graf 1462de4ff7bSThomas Graf if (ops->name == NULL || ops->find == NULL || ops->init == NULL || 1472de4ff7bSThomas Graf ops->get_pattern == NULL || ops->get_pattern_len == NULL) 1482de4ff7bSThomas Graf return -EINVAL; 1492de4ff7bSThomas Graf 1502de4ff7bSThomas Graf spin_lock(&ts_mod_lock); 1512de4ff7bSThomas Graf list_for_each_entry(o, &ts_ops, list) { 1522de4ff7bSThomas Graf if (!strcmp(ops->name, o->name)) 1532de4ff7bSThomas Graf goto errout; 1542de4ff7bSThomas Graf } 1552de4ff7bSThomas Graf 1562de4ff7bSThomas Graf list_add_tail_rcu(&ops->list, &ts_ops); 1572de4ff7bSThomas Graf err = 0; 1582de4ff7bSThomas Graf errout: 1592de4ff7bSThomas Graf spin_unlock(&ts_mod_lock); 1602de4ff7bSThomas Graf return err; 1612de4ff7bSThomas Graf } 162ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_register); 1632de4ff7bSThomas Graf 1642de4ff7bSThomas Graf /** 1652de4ff7bSThomas Graf * textsearch_unregister - unregister a textsearch module 1662de4ff7bSThomas Graf * @ops: operations lookup table 1672de4ff7bSThomas Graf * 1682de4ff7bSThomas Graf * This function must be called by textsearch modules to announce 1692de4ff7bSThomas Graf * their disappearance for examples when the module gets unloaded. 1702de4ff7bSThomas Graf * The &ops parameter must be the same as the one during the 1712de4ff7bSThomas Graf * registration. 1722de4ff7bSThomas Graf * 1732de4ff7bSThomas Graf * Returns 0 on success or -ENOENT if no matching textsearch 1742de4ff7bSThomas Graf * registration was found. 1752de4ff7bSThomas Graf */ 1762de4ff7bSThomas Graf int textsearch_unregister(struct ts_ops *ops) 1772de4ff7bSThomas Graf { 1782de4ff7bSThomas Graf int err = 0; 1792de4ff7bSThomas Graf struct ts_ops *o; 1802de4ff7bSThomas Graf 1812de4ff7bSThomas Graf spin_lock(&ts_mod_lock); 1822de4ff7bSThomas Graf list_for_each_entry(o, &ts_ops, list) { 1832de4ff7bSThomas Graf if (o == ops) { 1842de4ff7bSThomas Graf list_del_rcu(&o->list); 1852de4ff7bSThomas Graf goto out; 1862de4ff7bSThomas Graf } 1872de4ff7bSThomas Graf } 1882de4ff7bSThomas Graf 1892de4ff7bSThomas Graf err = -ENOENT; 1902de4ff7bSThomas Graf out: 1912de4ff7bSThomas Graf spin_unlock(&ts_mod_lock); 1922de4ff7bSThomas Graf return err; 1932de4ff7bSThomas Graf } 194ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_unregister); 1952de4ff7bSThomas Graf 1962de4ff7bSThomas Graf struct ts_linear_state 1972de4ff7bSThomas Graf { 1982de4ff7bSThomas Graf unsigned int len; 1992de4ff7bSThomas Graf const void *data; 2002de4ff7bSThomas Graf }; 2012de4ff7bSThomas Graf 2022de4ff7bSThomas Graf static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, 2032de4ff7bSThomas Graf struct ts_config *conf, 2042de4ff7bSThomas Graf struct ts_state *state) 2052de4ff7bSThomas Graf { 2062de4ff7bSThomas Graf struct ts_linear_state *st = (struct ts_linear_state *) state->cb; 2072de4ff7bSThomas Graf 2082de4ff7bSThomas Graf if (likely(consumed < st->len)) { 2092de4ff7bSThomas Graf *dst = st->data + consumed; 2102de4ff7bSThomas Graf return st->len - consumed; 2112de4ff7bSThomas Graf } 2122de4ff7bSThomas Graf 2132de4ff7bSThomas Graf return 0; 2142de4ff7bSThomas Graf } 2152de4ff7bSThomas Graf 2162de4ff7bSThomas Graf /** 2172de4ff7bSThomas Graf * textsearch_find_continuous - search a pattern in continuous/linear data 2182de4ff7bSThomas Graf * @conf: search configuration 2192de4ff7bSThomas Graf * @state: search state 2202de4ff7bSThomas Graf * @data: data to search in 2212de4ff7bSThomas Graf * @len: length of data 2222de4ff7bSThomas Graf * 2232de4ff7bSThomas Graf * A simplified version of textsearch_find() for continuous/linear data. 2242de4ff7bSThomas Graf * Call textsearch_next() to retrieve subsequent matches. 2252de4ff7bSThomas Graf * 2262de4ff7bSThomas Graf * Returns the position of first occurrence of the pattern or 22772fd4a35SRobert P. J. Day * %UINT_MAX if no occurrence was found. 2282de4ff7bSThomas Graf */ 2292de4ff7bSThomas Graf unsigned int textsearch_find_continuous(struct ts_config *conf, 2302de4ff7bSThomas Graf struct ts_state *state, 2312de4ff7bSThomas Graf const void *data, unsigned int len) 2322de4ff7bSThomas Graf { 2332de4ff7bSThomas Graf struct ts_linear_state *st = (struct ts_linear_state *) state->cb; 2342de4ff7bSThomas Graf 2352de4ff7bSThomas Graf conf->get_next_block = get_linear_data; 2362de4ff7bSThomas Graf st->data = data; 2372de4ff7bSThomas Graf st->len = len; 2382de4ff7bSThomas Graf 2392de4ff7bSThomas Graf return textsearch_find(conf, state); 2402de4ff7bSThomas Graf } 241ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_find_continuous); 2422de4ff7bSThomas Graf 2432de4ff7bSThomas Graf /** 2442de4ff7bSThomas Graf * textsearch_prepare - Prepare a search 2452de4ff7bSThomas Graf * @algo: name of search algorithm 2462de4ff7bSThomas Graf * @pattern: pattern data 2472de4ff7bSThomas Graf * @len: length of pattern 2482de4ff7bSThomas Graf * @gfp_mask: allocation mask 2492de4ff7bSThomas Graf * @flags: search flags 2502de4ff7bSThomas Graf * 2512de4ff7bSThomas Graf * Looks up the search algorithm module and creates a new textsearch 252*fec22908SRaphael Silva * configuration for the specified pattern. 2532de4ff7bSThomas Graf * 2542de4ff7bSThomas Graf * Note: The format of the pattern may not be compatible between 2552de4ff7bSThomas Graf * the various search algorithms. 2562de4ff7bSThomas Graf * 2572de4ff7bSThomas Graf * Returns a new textsearch configuration according to the specified 258e03ba84aSPablo Neira Ayuso * parameters or a ERR_PTR(). If a zero length pattern is passed, this 259e03ba84aSPablo Neira Ayuso * function returns EINVAL. 2602de4ff7bSThomas Graf */ 2612de4ff7bSThomas Graf struct ts_config *textsearch_prepare(const char *algo, const void *pattern, 262fd4f2df2SAl Viro unsigned int len, gfp_t gfp_mask, int flags) 2632de4ff7bSThomas Graf { 2642de4ff7bSThomas Graf int err = -ENOENT; 2652de4ff7bSThomas Graf struct ts_config *conf; 2662de4ff7bSThomas Graf struct ts_ops *ops; 2672de4ff7bSThomas Graf 268e03ba84aSPablo Neira Ayuso if (len == 0) 269e03ba84aSPablo Neira Ayuso return ERR_PTR(-EINVAL); 270e03ba84aSPablo Neira Ayuso 2712de4ff7bSThomas Graf ops = lookup_ts_algo(algo); 272a00caa1fSJohannes Berg #ifdef CONFIG_MODULES 2732de4ff7bSThomas Graf /* 2742de4ff7bSThomas Graf * Why not always autoload you may ask. Some users are 2752de4ff7bSThomas Graf * in a situation where requesting a module may deadlock, 2762de4ff7bSThomas Graf * especially when the module is located on a NFS mount. 2772de4ff7bSThomas Graf */ 2782de4ff7bSThomas Graf if (ops == NULL && flags & TS_AUTOLOAD) { 2792de4ff7bSThomas Graf request_module("ts_%s", algo); 2802de4ff7bSThomas Graf ops = lookup_ts_algo(algo); 2812de4ff7bSThomas Graf } 2822de4ff7bSThomas Graf #endif 2832de4ff7bSThomas Graf 2842de4ff7bSThomas Graf if (ops == NULL) 2852de4ff7bSThomas Graf goto errout; 2862de4ff7bSThomas Graf 287b9c79678SJoonwoo Park conf = ops->init(pattern, len, gfp_mask, flags); 2882de4ff7bSThomas Graf if (IS_ERR(conf)) { 2892de4ff7bSThomas Graf err = PTR_ERR(conf); 2902de4ff7bSThomas Graf goto errout; 2912de4ff7bSThomas Graf } 2922de4ff7bSThomas Graf 2932de4ff7bSThomas Graf conf->ops = ops; 2942de4ff7bSThomas Graf return conf; 2952de4ff7bSThomas Graf 2962de4ff7bSThomas Graf errout: 2972de4ff7bSThomas Graf if (ops) 2982de4ff7bSThomas Graf module_put(ops->owner); 2992de4ff7bSThomas Graf 3002de4ff7bSThomas Graf return ERR_PTR(err); 3012de4ff7bSThomas Graf } 302ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_prepare); 3032de4ff7bSThomas Graf 3042de4ff7bSThomas Graf /** 3052de4ff7bSThomas Graf * textsearch_destroy - destroy a search configuration 3062de4ff7bSThomas Graf * @conf: search configuration 3072de4ff7bSThomas Graf * 3082de4ff7bSThomas Graf * Releases all references of the configuration and frees 3092de4ff7bSThomas Graf * up the memory. 3102de4ff7bSThomas Graf */ 3112de4ff7bSThomas Graf void textsearch_destroy(struct ts_config *conf) 3122de4ff7bSThomas Graf { 3132de4ff7bSThomas Graf if (conf->ops) { 3142de4ff7bSThomas Graf if (conf->ops->destroy) 3152de4ff7bSThomas Graf conf->ops->destroy(conf); 3162de4ff7bSThomas Graf module_put(conf->ops->owner); 3172de4ff7bSThomas Graf } 3182de4ff7bSThomas Graf 3192de4ff7bSThomas Graf kfree(conf); 3202de4ff7bSThomas Graf } 3212de4ff7bSThomas Graf EXPORT_SYMBOL(textsearch_destroy); 322