12de4ff7bSThomas Graf /* 22de4ff7bSThomas Graf * lib/textsearch.c Generic text search interface 32de4ff7bSThomas Graf * 42de4ff7bSThomas Graf * This program is free software; you can redistribute it and/or 52de4ff7bSThomas Graf * modify it under the terms of the GNU General Public License 62de4ff7bSThomas Graf * as published by the Free Software Foundation; either version 72de4ff7bSThomas Graf * 2 of the License, or (at your option) any later version. 82de4ff7bSThomas Graf * 92de4ff7bSThomas Graf * Authors: Thomas Graf <tgraf@suug.ch> 10e03ba84aSPablo Neira Ayuso * Pablo Neira Ayuso <pablo@netfilter.org> 112de4ff7bSThomas Graf * 122de4ff7bSThomas Graf * ========================================================================== 13*5968a70dSRandy Dunlap */ 14*5968a70dSRandy Dunlap 15*5968a70dSRandy Dunlap /** 16*5968a70dSRandy Dunlap * DOC: ts_intro 172de4ff7bSThomas Graf * INTRODUCTION 182de4ff7bSThomas Graf * 19de0368d5SJesper Dangaard Brouer * The textsearch infrastructure provides text searching facilities for 202de4ff7bSThomas Graf * both linear and non-linear data. Individual search algorithms are 212de4ff7bSThomas Graf * implemented in modules and chosen by the user. 222de4ff7bSThomas Graf * 232de4ff7bSThomas Graf * ARCHITECTURE 242de4ff7bSThomas Graf * 25*5968a70dSRandy Dunlap * .. code-block:: none 26*5968a70dSRandy Dunlap * 272de4ff7bSThomas Graf * User 282de4ff7bSThomas Graf * +----------------+ 292de4ff7bSThomas Graf * | finish()|<--------------(6)-----------------+ 302de4ff7bSThomas Graf * |get_next_block()|<--------------(5)---------------+ | 312de4ff7bSThomas Graf * | | Algorithm | | 322de4ff7bSThomas Graf * | | +------------------------------+ 332de4ff7bSThomas Graf * | | | init() find() destroy() | 342de4ff7bSThomas Graf * | | +------------------------------+ 352de4ff7bSThomas Graf * | | Core API ^ ^ ^ 362de4ff7bSThomas Graf * | | +---------------+ (2) (4) (8) 372de4ff7bSThomas Graf * | (1)|----->| prepare() |---+ | | 382de4ff7bSThomas Graf * | (3)|----->| find()/next() |-----------+ | 392de4ff7bSThomas Graf * | (7)|----->| destroy() |----------------------+ 402de4ff7bSThomas Graf * +----------------+ +---------------+ 412de4ff7bSThomas Graf * 42*5968a70dSRandy Dunlap * (1) User configures a search by calling textsearch_prepare() specifying 43*5968a70dSRandy Dunlap * the search parameters such as the pattern and algorithm name. 442de4ff7bSThomas Graf * (2) Core requests the algorithm to allocate and initialize a search 452de4ff7bSThomas Graf * configuration according to the specified parameters. 46*5968a70dSRandy Dunlap * (3) User starts the search(es) by calling textsearch_find() or 47*5968a70dSRandy Dunlap * textsearch_next() to fetch subsequent occurrences. A state variable 48*5968a70dSRandy Dunlap * is provided to the algorithm to store persistent variables. 492de4ff7bSThomas Graf * (4) Core eventually resets the search offset and forwards the find() 502de4ff7bSThomas Graf * request to the algorithm. 51de0368d5SJesper Dangaard Brouer * (5) Algorithm calls get_next_block() provided by the user continuously 522de4ff7bSThomas Graf * to fetch the data to be searched in block by block. 532de4ff7bSThomas Graf * (6) Algorithm invokes finish() after the last call to get_next_block 542de4ff7bSThomas Graf * to clean up any leftovers from get_next_block. (Optional) 55*5968a70dSRandy Dunlap * (7) User destroys the configuration by calling textsearch_destroy(). 562de4ff7bSThomas Graf * (8) Core notifies the algorithm to destroy algorithm specific 572de4ff7bSThomas Graf * allocations. (Optional) 582de4ff7bSThomas Graf * 592de4ff7bSThomas Graf * USAGE 602de4ff7bSThomas Graf * 612de4ff7bSThomas Graf * Before a search can be performed, a configuration must be created 62b9c79678SJoonwoo Park * by calling textsearch_prepare() specifying the searching algorithm, 63b9c79678SJoonwoo Park * the pattern to look for and flags. As a flag, you can set TS_IGNORECASE 64b9c79678SJoonwoo Park * to perform case insensitive matching. But it might slow down 65b9c79678SJoonwoo Park * performance of algorithm, so you should use it at own your risk. 66de0368d5SJesper Dangaard Brouer * The returned configuration may then be used for an arbitrary 67b9c79678SJoonwoo Park * amount of times and even in parallel as long as a separate struct 68b9c79678SJoonwoo Park * ts_state variable is provided to every instance. 692de4ff7bSThomas Graf * 70*5968a70dSRandy Dunlap * The actual search is performed by either calling 71*5968a70dSRandy Dunlap * textsearch_find_continuous() for linear data or by providing 72*5968a70dSRandy Dunlap * an own get_next_block() implementation and 73*5968a70dSRandy Dunlap * calling textsearch_find(). Both functions return 74de0368d5SJesper Dangaard Brouer * the position of the first occurrence of the pattern or UINT_MAX if 75de0368d5SJesper Dangaard Brouer * no match was found. Subsequent occurrences can be found by calling 762de4ff7bSThomas Graf * textsearch_next() regardless of the linearity of the data. 772de4ff7bSThomas Graf * 782de4ff7bSThomas Graf * Once you're done using a configuration it must be given back via 792de4ff7bSThomas Graf * textsearch_destroy. 802de4ff7bSThomas Graf * 81*5968a70dSRandy Dunlap * EXAMPLE:: 822de4ff7bSThomas Graf * 832de4ff7bSThomas Graf * int pos; 842de4ff7bSThomas Graf * struct ts_config *conf; 852de4ff7bSThomas Graf * struct ts_state state; 862de4ff7bSThomas Graf * const char *pattern = "chicken"; 872de4ff7bSThomas Graf * const char *example = "We dance the funky chicken"; 882de4ff7bSThomas Graf * 892de4ff7bSThomas Graf * conf = textsearch_prepare("kmp", pattern, strlen(pattern), 902de4ff7bSThomas Graf * GFP_KERNEL, TS_AUTOLOAD); 912de4ff7bSThomas Graf * if (IS_ERR(conf)) { 922de4ff7bSThomas Graf * err = PTR_ERR(conf); 932de4ff7bSThomas Graf * goto errout; 942de4ff7bSThomas Graf * } 952de4ff7bSThomas Graf * 96*5968a70dSRandy Dunlap * pos = textsearch_find_continuous(conf, \&state, example, strlen(example)); 972de4ff7bSThomas Graf * if (pos != UINT_MAX) 98*5968a70dSRandy Dunlap * panic("Oh my god, dancing chickens at \%d\n", pos); 992de4ff7bSThomas Graf * 1002de4ff7bSThomas Graf * textsearch_destroy(conf); 1012de4ff7bSThomas Graf */ 102*5968a70dSRandy Dunlap /* ========================================================================== */ 1032de4ff7bSThomas Graf 1042de4ff7bSThomas Graf #include <linux/module.h> 1052de4ff7bSThomas Graf #include <linux/types.h> 1062de4ff7bSThomas Graf #include <linux/string.h> 1072de4ff7bSThomas Graf #include <linux/init.h> 10882524746SFranck Bui-Huu #include <linux/rculist.h> 1092de4ff7bSThomas Graf #include <linux/rcupdate.h> 1102de4ff7bSThomas Graf #include <linux/err.h> 1112de4ff7bSThomas Graf #include <linux/textsearch.h> 1125a0e3ad6STejun Heo #include <linux/slab.h> 1132de4ff7bSThomas Graf 1142de4ff7bSThomas Graf static LIST_HEAD(ts_ops); 1152de4ff7bSThomas Graf static DEFINE_SPINLOCK(ts_mod_lock); 1162de4ff7bSThomas Graf 1172de4ff7bSThomas Graf static inline struct ts_ops *lookup_ts_algo(const char *name) 1182de4ff7bSThomas Graf { 1192de4ff7bSThomas Graf struct ts_ops *o; 1202de4ff7bSThomas Graf 1212de4ff7bSThomas Graf rcu_read_lock(); 1222de4ff7bSThomas Graf list_for_each_entry_rcu(o, &ts_ops, list) { 1232de4ff7bSThomas Graf if (!strcmp(name, o->name)) { 1242de4ff7bSThomas Graf if (!try_module_get(o->owner)) 1252de4ff7bSThomas Graf o = NULL; 1262de4ff7bSThomas Graf rcu_read_unlock(); 1272de4ff7bSThomas Graf return o; 1282de4ff7bSThomas Graf } 1292de4ff7bSThomas Graf } 1302de4ff7bSThomas Graf rcu_read_unlock(); 1312de4ff7bSThomas Graf 1322de4ff7bSThomas Graf return NULL; 1332de4ff7bSThomas Graf } 1342de4ff7bSThomas Graf 1352de4ff7bSThomas Graf /** 1362de4ff7bSThomas Graf * textsearch_register - register a textsearch module 1372de4ff7bSThomas Graf * @ops: operations lookup table 1382de4ff7bSThomas Graf * 1392de4ff7bSThomas Graf * This function must be called by textsearch modules to announce 1402de4ff7bSThomas Graf * their presence. The specified &@ops must have %name set to a 1412de4ff7bSThomas Graf * unique identifier and the callbacks find(), init(), get_pattern(), 1422de4ff7bSThomas Graf * and get_pattern_len() must be implemented. 1432de4ff7bSThomas Graf * 1442de4ff7bSThomas Graf * Returns 0 or -EEXISTS if another module has already registered 1452de4ff7bSThomas Graf * with same name. 1462de4ff7bSThomas Graf */ 1472de4ff7bSThomas Graf int textsearch_register(struct ts_ops *ops) 1482de4ff7bSThomas Graf { 1492de4ff7bSThomas Graf int err = -EEXIST; 1502de4ff7bSThomas Graf struct ts_ops *o; 1512de4ff7bSThomas Graf 1522de4ff7bSThomas Graf if (ops->name == NULL || ops->find == NULL || ops->init == NULL || 1532de4ff7bSThomas Graf ops->get_pattern == NULL || ops->get_pattern_len == NULL) 1542de4ff7bSThomas Graf return -EINVAL; 1552de4ff7bSThomas Graf 1562de4ff7bSThomas Graf spin_lock(&ts_mod_lock); 1572de4ff7bSThomas Graf list_for_each_entry(o, &ts_ops, list) { 1582de4ff7bSThomas Graf if (!strcmp(ops->name, o->name)) 1592de4ff7bSThomas Graf goto errout; 1602de4ff7bSThomas Graf } 1612de4ff7bSThomas Graf 1622de4ff7bSThomas Graf list_add_tail_rcu(&ops->list, &ts_ops); 1632de4ff7bSThomas Graf err = 0; 1642de4ff7bSThomas Graf errout: 1652de4ff7bSThomas Graf spin_unlock(&ts_mod_lock); 1662de4ff7bSThomas Graf return err; 1672de4ff7bSThomas Graf } 168ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_register); 1692de4ff7bSThomas Graf 1702de4ff7bSThomas Graf /** 1712de4ff7bSThomas Graf * textsearch_unregister - unregister a textsearch module 1722de4ff7bSThomas Graf * @ops: operations lookup table 1732de4ff7bSThomas Graf * 1742de4ff7bSThomas Graf * This function must be called by textsearch modules to announce 1752de4ff7bSThomas Graf * their disappearance for examples when the module gets unloaded. 1762de4ff7bSThomas Graf * The &ops parameter must be the same as the one during the 1772de4ff7bSThomas Graf * registration. 1782de4ff7bSThomas Graf * 1792de4ff7bSThomas Graf * Returns 0 on success or -ENOENT if no matching textsearch 1802de4ff7bSThomas Graf * registration was found. 1812de4ff7bSThomas Graf */ 1822de4ff7bSThomas Graf int textsearch_unregister(struct ts_ops *ops) 1832de4ff7bSThomas Graf { 1842de4ff7bSThomas Graf int err = 0; 1852de4ff7bSThomas Graf struct ts_ops *o; 1862de4ff7bSThomas Graf 1872de4ff7bSThomas Graf spin_lock(&ts_mod_lock); 1882de4ff7bSThomas Graf list_for_each_entry(o, &ts_ops, list) { 1892de4ff7bSThomas Graf if (o == ops) { 1902de4ff7bSThomas Graf list_del_rcu(&o->list); 1912de4ff7bSThomas Graf goto out; 1922de4ff7bSThomas Graf } 1932de4ff7bSThomas Graf } 1942de4ff7bSThomas Graf 1952de4ff7bSThomas Graf err = -ENOENT; 1962de4ff7bSThomas Graf out: 1972de4ff7bSThomas Graf spin_unlock(&ts_mod_lock); 1982de4ff7bSThomas Graf return err; 1992de4ff7bSThomas Graf } 200ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_unregister); 2012de4ff7bSThomas Graf 2022de4ff7bSThomas Graf struct ts_linear_state 2032de4ff7bSThomas Graf { 2042de4ff7bSThomas Graf unsigned int len; 2052de4ff7bSThomas Graf const void *data; 2062de4ff7bSThomas Graf }; 2072de4ff7bSThomas Graf 2082de4ff7bSThomas Graf static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, 2092de4ff7bSThomas Graf struct ts_config *conf, 2102de4ff7bSThomas Graf struct ts_state *state) 2112de4ff7bSThomas Graf { 2122de4ff7bSThomas Graf struct ts_linear_state *st = (struct ts_linear_state *) state->cb; 2132de4ff7bSThomas Graf 2142de4ff7bSThomas Graf if (likely(consumed < st->len)) { 2152de4ff7bSThomas Graf *dst = st->data + consumed; 2162de4ff7bSThomas Graf return st->len - consumed; 2172de4ff7bSThomas Graf } 2182de4ff7bSThomas Graf 2192de4ff7bSThomas Graf return 0; 2202de4ff7bSThomas Graf } 2212de4ff7bSThomas Graf 2222de4ff7bSThomas Graf /** 2232de4ff7bSThomas Graf * textsearch_find_continuous - search a pattern in continuous/linear data 2242de4ff7bSThomas Graf * @conf: search configuration 2252de4ff7bSThomas Graf * @state: search state 2262de4ff7bSThomas Graf * @data: data to search in 2272de4ff7bSThomas Graf * @len: length of data 2282de4ff7bSThomas Graf * 2292de4ff7bSThomas Graf * A simplified version of textsearch_find() for continuous/linear data. 2302de4ff7bSThomas Graf * Call textsearch_next() to retrieve subsequent matches. 2312de4ff7bSThomas Graf * 2322de4ff7bSThomas Graf * Returns the position of first occurrence of the pattern or 23372fd4a35SRobert P. J. Day * %UINT_MAX if no occurrence was found. 2342de4ff7bSThomas Graf */ 2352de4ff7bSThomas Graf unsigned int textsearch_find_continuous(struct ts_config *conf, 2362de4ff7bSThomas Graf struct ts_state *state, 2372de4ff7bSThomas Graf const void *data, unsigned int len) 2382de4ff7bSThomas Graf { 2392de4ff7bSThomas Graf struct ts_linear_state *st = (struct ts_linear_state *) state->cb; 2402de4ff7bSThomas Graf 2412de4ff7bSThomas Graf conf->get_next_block = get_linear_data; 2422de4ff7bSThomas Graf st->data = data; 2432de4ff7bSThomas Graf st->len = len; 2442de4ff7bSThomas Graf 2452de4ff7bSThomas Graf return textsearch_find(conf, state); 2462de4ff7bSThomas Graf } 247ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_find_continuous); 2482de4ff7bSThomas Graf 2492de4ff7bSThomas Graf /** 2502de4ff7bSThomas Graf * textsearch_prepare - Prepare a search 2512de4ff7bSThomas Graf * @algo: name of search algorithm 2522de4ff7bSThomas Graf * @pattern: pattern data 2532de4ff7bSThomas Graf * @len: length of pattern 2542de4ff7bSThomas Graf * @gfp_mask: allocation mask 2552de4ff7bSThomas Graf * @flags: search flags 2562de4ff7bSThomas Graf * 2572de4ff7bSThomas Graf * Looks up the search algorithm module and creates a new textsearch 258fec22908SRaphael Silva * configuration for the specified pattern. 2592de4ff7bSThomas Graf * 2602de4ff7bSThomas Graf * Note: The format of the pattern may not be compatible between 2612de4ff7bSThomas Graf * the various search algorithms. 2622de4ff7bSThomas Graf * 2632de4ff7bSThomas Graf * Returns a new textsearch configuration according to the specified 264e03ba84aSPablo Neira Ayuso * parameters or a ERR_PTR(). If a zero length pattern is passed, this 265e03ba84aSPablo Neira Ayuso * function returns EINVAL. 2662de4ff7bSThomas Graf */ 2672de4ff7bSThomas Graf struct ts_config *textsearch_prepare(const char *algo, const void *pattern, 268fd4f2df2SAl Viro unsigned int len, gfp_t gfp_mask, int flags) 2692de4ff7bSThomas Graf { 2702de4ff7bSThomas Graf int err = -ENOENT; 2712de4ff7bSThomas Graf struct ts_config *conf; 2722de4ff7bSThomas Graf struct ts_ops *ops; 2732de4ff7bSThomas Graf 274e03ba84aSPablo Neira Ayuso if (len == 0) 275e03ba84aSPablo Neira Ayuso return ERR_PTR(-EINVAL); 276e03ba84aSPablo Neira Ayuso 2772de4ff7bSThomas Graf ops = lookup_ts_algo(algo); 278a00caa1fSJohannes Berg #ifdef CONFIG_MODULES 2792de4ff7bSThomas Graf /* 2802de4ff7bSThomas Graf * Why not always autoload you may ask. Some users are 2812de4ff7bSThomas Graf * in a situation where requesting a module may deadlock, 2822de4ff7bSThomas Graf * especially when the module is located on a NFS mount. 2832de4ff7bSThomas Graf */ 2842de4ff7bSThomas Graf if (ops == NULL && flags & TS_AUTOLOAD) { 2852de4ff7bSThomas Graf request_module("ts_%s", algo); 2862de4ff7bSThomas Graf ops = lookup_ts_algo(algo); 2872de4ff7bSThomas Graf } 2882de4ff7bSThomas Graf #endif 2892de4ff7bSThomas Graf 2902de4ff7bSThomas Graf if (ops == NULL) 2912de4ff7bSThomas Graf goto errout; 2922de4ff7bSThomas Graf 293b9c79678SJoonwoo Park conf = ops->init(pattern, len, gfp_mask, flags); 2942de4ff7bSThomas Graf if (IS_ERR(conf)) { 2952de4ff7bSThomas Graf err = PTR_ERR(conf); 2962de4ff7bSThomas Graf goto errout; 2972de4ff7bSThomas Graf } 2982de4ff7bSThomas Graf 2992de4ff7bSThomas Graf conf->ops = ops; 3002de4ff7bSThomas Graf return conf; 3012de4ff7bSThomas Graf 3022de4ff7bSThomas Graf errout: 3032de4ff7bSThomas Graf if (ops) 3042de4ff7bSThomas Graf module_put(ops->owner); 3052de4ff7bSThomas Graf 3062de4ff7bSThomas Graf return ERR_PTR(err); 3072de4ff7bSThomas Graf } 308ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_prepare); 3092de4ff7bSThomas Graf 3102de4ff7bSThomas Graf /** 3112de4ff7bSThomas Graf * textsearch_destroy - destroy a search configuration 3122de4ff7bSThomas Graf * @conf: search configuration 3132de4ff7bSThomas Graf * 3142de4ff7bSThomas Graf * Releases all references of the configuration and frees 3152de4ff7bSThomas Graf * up the memory. 3162de4ff7bSThomas Graf */ 3172de4ff7bSThomas Graf void textsearch_destroy(struct ts_config *conf) 3182de4ff7bSThomas Graf { 3192de4ff7bSThomas Graf if (conf->ops) { 3202de4ff7bSThomas Graf if (conf->ops->destroy) 3212de4ff7bSThomas Graf conf->ops->destroy(conf); 3222de4ff7bSThomas Graf module_put(conf->ops->owner); 3232de4ff7bSThomas Graf } 3242de4ff7bSThomas Graf 3252de4ff7bSThomas Graf kfree(conf); 3262de4ff7bSThomas Graf } 3272de4ff7bSThomas Graf EXPORT_SYMBOL(textsearch_destroy); 328