1*2874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 22de4ff7bSThomas Graf /* 32de4ff7bSThomas Graf * lib/textsearch.c Generic text search interface 42de4ff7bSThomas Graf * 52de4ff7bSThomas Graf * Authors: Thomas Graf <tgraf@suug.ch> 6e03ba84aSPablo Neira Ayuso * Pablo Neira Ayuso <pablo@netfilter.org> 72de4ff7bSThomas Graf * 82de4ff7bSThomas Graf * ========================================================================== 95968a70dSRandy Dunlap */ 105968a70dSRandy Dunlap 115968a70dSRandy Dunlap /** 125968a70dSRandy Dunlap * DOC: ts_intro 132de4ff7bSThomas Graf * INTRODUCTION 142de4ff7bSThomas Graf * 15de0368d5SJesper Dangaard Brouer * The textsearch infrastructure provides text searching facilities for 162de4ff7bSThomas Graf * both linear and non-linear data. Individual search algorithms are 172de4ff7bSThomas Graf * implemented in modules and chosen by the user. 182de4ff7bSThomas Graf * 192de4ff7bSThomas Graf * ARCHITECTURE 202de4ff7bSThomas Graf * 215968a70dSRandy Dunlap * .. code-block:: none 225968a70dSRandy Dunlap * 232de4ff7bSThomas Graf * User 242de4ff7bSThomas Graf * +----------------+ 252de4ff7bSThomas Graf * | finish()|<--------------(6)-----------------+ 262de4ff7bSThomas Graf * |get_next_block()|<--------------(5)---------------+ | 272de4ff7bSThomas Graf * | | Algorithm | | 282de4ff7bSThomas Graf * | | +------------------------------+ 292de4ff7bSThomas Graf * | | | init() find() destroy() | 302de4ff7bSThomas Graf * | | +------------------------------+ 312de4ff7bSThomas Graf * | | Core API ^ ^ ^ 322de4ff7bSThomas Graf * | | +---------------+ (2) (4) (8) 332de4ff7bSThomas Graf * | (1)|----->| prepare() |---+ | | 342de4ff7bSThomas Graf * | (3)|----->| find()/next() |-----------+ | 352de4ff7bSThomas Graf * | (7)|----->| destroy() |----------------------+ 362de4ff7bSThomas Graf * +----------------+ +---------------+ 372de4ff7bSThomas Graf * 385968a70dSRandy Dunlap * (1) User configures a search by calling textsearch_prepare() specifying 395968a70dSRandy Dunlap * the search parameters such as the pattern and algorithm name. 402de4ff7bSThomas Graf * (2) Core requests the algorithm to allocate and initialize a search 412de4ff7bSThomas Graf * configuration according to the specified parameters. 425968a70dSRandy Dunlap * (3) User starts the search(es) by calling textsearch_find() or 435968a70dSRandy Dunlap * textsearch_next() to fetch subsequent occurrences. A state variable 445968a70dSRandy Dunlap * is provided to the algorithm to store persistent variables. 452de4ff7bSThomas Graf * (4) Core eventually resets the search offset and forwards the find() 462de4ff7bSThomas Graf * request to the algorithm. 47de0368d5SJesper Dangaard Brouer * (5) Algorithm calls get_next_block() provided by the user continuously 482de4ff7bSThomas Graf * to fetch the data to be searched in block by block. 492de4ff7bSThomas Graf * (6) Algorithm invokes finish() after the last call to get_next_block 502de4ff7bSThomas Graf * to clean up any leftovers from get_next_block. (Optional) 515968a70dSRandy Dunlap * (7) User destroys the configuration by calling textsearch_destroy(). 522de4ff7bSThomas Graf * (8) Core notifies the algorithm to destroy algorithm specific 532de4ff7bSThomas Graf * allocations. (Optional) 542de4ff7bSThomas Graf * 552de4ff7bSThomas Graf * USAGE 562de4ff7bSThomas Graf * 572de4ff7bSThomas Graf * Before a search can be performed, a configuration must be created 58b9c79678SJoonwoo Park * by calling textsearch_prepare() specifying the searching algorithm, 59b9c79678SJoonwoo Park * the pattern to look for and flags. As a flag, you can set TS_IGNORECASE 60b9c79678SJoonwoo Park * to perform case insensitive matching. But it might slow down 61b9c79678SJoonwoo Park * performance of algorithm, so you should use it at own your risk. 62de0368d5SJesper Dangaard Brouer * The returned configuration may then be used for an arbitrary 63b9c79678SJoonwoo Park * amount of times and even in parallel as long as a separate struct 64b9c79678SJoonwoo Park * ts_state variable is provided to every instance. 652de4ff7bSThomas Graf * 665968a70dSRandy Dunlap * The actual search is performed by either calling 675968a70dSRandy Dunlap * textsearch_find_continuous() for linear data or by providing 685968a70dSRandy Dunlap * an own get_next_block() implementation and 695968a70dSRandy Dunlap * calling textsearch_find(). Both functions return 70de0368d5SJesper Dangaard Brouer * the position of the first occurrence of the pattern or UINT_MAX if 71de0368d5SJesper Dangaard Brouer * no match was found. Subsequent occurrences can be found by calling 722de4ff7bSThomas Graf * textsearch_next() regardless of the linearity of the data. 732de4ff7bSThomas Graf * 742de4ff7bSThomas Graf * Once you're done using a configuration it must be given back via 752de4ff7bSThomas Graf * textsearch_destroy. 762de4ff7bSThomas Graf * 775968a70dSRandy Dunlap * EXAMPLE:: 782de4ff7bSThomas Graf * 792de4ff7bSThomas Graf * int pos; 802de4ff7bSThomas Graf * struct ts_config *conf; 812de4ff7bSThomas Graf * struct ts_state state; 822de4ff7bSThomas Graf * const char *pattern = "chicken"; 832de4ff7bSThomas Graf * const char *example = "We dance the funky chicken"; 842de4ff7bSThomas Graf * 852de4ff7bSThomas Graf * conf = textsearch_prepare("kmp", pattern, strlen(pattern), 862de4ff7bSThomas Graf * GFP_KERNEL, TS_AUTOLOAD); 872de4ff7bSThomas Graf * if (IS_ERR(conf)) { 882de4ff7bSThomas Graf * err = PTR_ERR(conf); 892de4ff7bSThomas Graf * goto errout; 902de4ff7bSThomas Graf * } 912de4ff7bSThomas Graf * 925968a70dSRandy Dunlap * pos = textsearch_find_continuous(conf, \&state, example, strlen(example)); 932de4ff7bSThomas Graf * if (pos != UINT_MAX) 945968a70dSRandy Dunlap * panic("Oh my god, dancing chickens at \%d\n", pos); 952de4ff7bSThomas Graf * 962de4ff7bSThomas Graf * textsearch_destroy(conf); 972de4ff7bSThomas Graf */ 985968a70dSRandy Dunlap /* ========================================================================== */ 992de4ff7bSThomas Graf 1002de4ff7bSThomas Graf #include <linux/module.h> 1012de4ff7bSThomas Graf #include <linux/types.h> 1022de4ff7bSThomas Graf #include <linux/string.h> 1032de4ff7bSThomas Graf #include <linux/init.h> 10482524746SFranck Bui-Huu #include <linux/rculist.h> 1052de4ff7bSThomas Graf #include <linux/rcupdate.h> 1062de4ff7bSThomas Graf #include <linux/err.h> 1072de4ff7bSThomas Graf #include <linux/textsearch.h> 1085a0e3ad6STejun Heo #include <linux/slab.h> 1092de4ff7bSThomas Graf 1102de4ff7bSThomas Graf static LIST_HEAD(ts_ops); 1112de4ff7bSThomas Graf static DEFINE_SPINLOCK(ts_mod_lock); 1122de4ff7bSThomas Graf 1132de4ff7bSThomas Graf static inline struct ts_ops *lookup_ts_algo(const char *name) 1142de4ff7bSThomas Graf { 1152de4ff7bSThomas Graf struct ts_ops *o; 1162de4ff7bSThomas Graf 1172de4ff7bSThomas Graf rcu_read_lock(); 1182de4ff7bSThomas Graf list_for_each_entry_rcu(o, &ts_ops, list) { 1192de4ff7bSThomas Graf if (!strcmp(name, o->name)) { 1202de4ff7bSThomas Graf if (!try_module_get(o->owner)) 1212de4ff7bSThomas Graf o = NULL; 1222de4ff7bSThomas Graf rcu_read_unlock(); 1232de4ff7bSThomas Graf return o; 1242de4ff7bSThomas Graf } 1252de4ff7bSThomas Graf } 1262de4ff7bSThomas Graf rcu_read_unlock(); 1272de4ff7bSThomas Graf 1282de4ff7bSThomas Graf return NULL; 1292de4ff7bSThomas Graf } 1302de4ff7bSThomas Graf 1312de4ff7bSThomas Graf /** 1322de4ff7bSThomas Graf * textsearch_register - register a textsearch module 1332de4ff7bSThomas Graf * @ops: operations lookup table 1342de4ff7bSThomas Graf * 1352de4ff7bSThomas Graf * This function must be called by textsearch modules to announce 1362de4ff7bSThomas Graf * their presence. The specified &@ops must have %name set to a 1372de4ff7bSThomas Graf * unique identifier and the callbacks find(), init(), get_pattern(), 1382de4ff7bSThomas Graf * and get_pattern_len() must be implemented. 1392de4ff7bSThomas Graf * 1402de4ff7bSThomas Graf * Returns 0 or -EEXISTS if another module has already registered 1412de4ff7bSThomas Graf * with same name. 1422de4ff7bSThomas Graf */ 1432de4ff7bSThomas Graf int textsearch_register(struct ts_ops *ops) 1442de4ff7bSThomas Graf { 1452de4ff7bSThomas Graf int err = -EEXIST; 1462de4ff7bSThomas Graf struct ts_ops *o; 1472de4ff7bSThomas Graf 1482de4ff7bSThomas Graf if (ops->name == NULL || ops->find == NULL || ops->init == NULL || 1492de4ff7bSThomas Graf ops->get_pattern == NULL || ops->get_pattern_len == NULL) 1502de4ff7bSThomas Graf return -EINVAL; 1512de4ff7bSThomas Graf 1522de4ff7bSThomas Graf spin_lock(&ts_mod_lock); 1532de4ff7bSThomas Graf list_for_each_entry(o, &ts_ops, list) { 1542de4ff7bSThomas Graf if (!strcmp(ops->name, o->name)) 1552de4ff7bSThomas Graf goto errout; 1562de4ff7bSThomas Graf } 1572de4ff7bSThomas Graf 1582de4ff7bSThomas Graf list_add_tail_rcu(&ops->list, &ts_ops); 1592de4ff7bSThomas Graf err = 0; 1602de4ff7bSThomas Graf errout: 1612de4ff7bSThomas Graf spin_unlock(&ts_mod_lock); 1622de4ff7bSThomas Graf return err; 1632de4ff7bSThomas Graf } 164ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_register); 1652de4ff7bSThomas Graf 1662de4ff7bSThomas Graf /** 1672de4ff7bSThomas Graf * textsearch_unregister - unregister a textsearch module 1682de4ff7bSThomas Graf * @ops: operations lookup table 1692de4ff7bSThomas Graf * 1702de4ff7bSThomas Graf * This function must be called by textsearch modules to announce 1712de4ff7bSThomas Graf * their disappearance for examples when the module gets unloaded. 1722de4ff7bSThomas Graf * The &ops parameter must be the same as the one during the 1732de4ff7bSThomas Graf * registration. 1742de4ff7bSThomas Graf * 1752de4ff7bSThomas Graf * Returns 0 on success or -ENOENT if no matching textsearch 1762de4ff7bSThomas Graf * registration was found. 1772de4ff7bSThomas Graf */ 1782de4ff7bSThomas Graf int textsearch_unregister(struct ts_ops *ops) 1792de4ff7bSThomas Graf { 1802de4ff7bSThomas Graf int err = 0; 1812de4ff7bSThomas Graf struct ts_ops *o; 1822de4ff7bSThomas Graf 1832de4ff7bSThomas Graf spin_lock(&ts_mod_lock); 1842de4ff7bSThomas Graf list_for_each_entry(o, &ts_ops, list) { 1852de4ff7bSThomas Graf if (o == ops) { 1862de4ff7bSThomas Graf list_del_rcu(&o->list); 1872de4ff7bSThomas Graf goto out; 1882de4ff7bSThomas Graf } 1892de4ff7bSThomas Graf } 1902de4ff7bSThomas Graf 1912de4ff7bSThomas Graf err = -ENOENT; 1922de4ff7bSThomas Graf out: 1932de4ff7bSThomas Graf spin_unlock(&ts_mod_lock); 1942de4ff7bSThomas Graf return err; 1952de4ff7bSThomas Graf } 196ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_unregister); 1972de4ff7bSThomas Graf 1982de4ff7bSThomas Graf struct ts_linear_state 1992de4ff7bSThomas Graf { 2002de4ff7bSThomas Graf unsigned int len; 2012de4ff7bSThomas Graf const void *data; 2022de4ff7bSThomas Graf }; 2032de4ff7bSThomas Graf 2042de4ff7bSThomas Graf static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, 2052de4ff7bSThomas Graf struct ts_config *conf, 2062de4ff7bSThomas Graf struct ts_state *state) 2072de4ff7bSThomas Graf { 2082de4ff7bSThomas Graf struct ts_linear_state *st = (struct ts_linear_state *) state->cb; 2092de4ff7bSThomas Graf 2102de4ff7bSThomas Graf if (likely(consumed < st->len)) { 2112de4ff7bSThomas Graf *dst = st->data + consumed; 2122de4ff7bSThomas Graf return st->len - consumed; 2132de4ff7bSThomas Graf } 2142de4ff7bSThomas Graf 2152de4ff7bSThomas Graf return 0; 2162de4ff7bSThomas Graf } 2172de4ff7bSThomas Graf 2182de4ff7bSThomas Graf /** 2192de4ff7bSThomas Graf * textsearch_find_continuous - search a pattern in continuous/linear data 2202de4ff7bSThomas Graf * @conf: search configuration 2212de4ff7bSThomas Graf * @state: search state 2222de4ff7bSThomas Graf * @data: data to search in 2232de4ff7bSThomas Graf * @len: length of data 2242de4ff7bSThomas Graf * 2252de4ff7bSThomas Graf * A simplified version of textsearch_find() for continuous/linear data. 2262de4ff7bSThomas Graf * Call textsearch_next() to retrieve subsequent matches. 2272de4ff7bSThomas Graf * 2282de4ff7bSThomas Graf * Returns the position of first occurrence of the pattern or 22972fd4a35SRobert P. J. Day * %UINT_MAX if no occurrence was found. 2302de4ff7bSThomas Graf */ 2312de4ff7bSThomas Graf unsigned int textsearch_find_continuous(struct ts_config *conf, 2322de4ff7bSThomas Graf struct ts_state *state, 2332de4ff7bSThomas Graf const void *data, unsigned int len) 2342de4ff7bSThomas Graf { 2352de4ff7bSThomas Graf struct ts_linear_state *st = (struct ts_linear_state *) state->cb; 2362de4ff7bSThomas Graf 2372de4ff7bSThomas Graf conf->get_next_block = get_linear_data; 2382de4ff7bSThomas Graf st->data = data; 2392de4ff7bSThomas Graf st->len = len; 2402de4ff7bSThomas Graf 2412de4ff7bSThomas Graf return textsearch_find(conf, state); 2422de4ff7bSThomas Graf } 243ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_find_continuous); 2442de4ff7bSThomas Graf 2452de4ff7bSThomas Graf /** 2462de4ff7bSThomas Graf * textsearch_prepare - Prepare a search 2472de4ff7bSThomas Graf * @algo: name of search algorithm 2482de4ff7bSThomas Graf * @pattern: pattern data 2492de4ff7bSThomas Graf * @len: length of pattern 2502de4ff7bSThomas Graf * @gfp_mask: allocation mask 2512de4ff7bSThomas Graf * @flags: search flags 2522de4ff7bSThomas Graf * 2532de4ff7bSThomas Graf * Looks up the search algorithm module and creates a new textsearch 254fec22908SRaphael Silva * configuration for the specified pattern. 2552de4ff7bSThomas Graf * 2562de4ff7bSThomas Graf * Note: The format of the pattern may not be compatible between 2572de4ff7bSThomas Graf * the various search algorithms. 2582de4ff7bSThomas Graf * 2592de4ff7bSThomas Graf * Returns a new textsearch configuration according to the specified 260e03ba84aSPablo Neira Ayuso * parameters or a ERR_PTR(). If a zero length pattern is passed, this 261e03ba84aSPablo Neira Ayuso * function returns EINVAL. 2622de4ff7bSThomas Graf */ 2632de4ff7bSThomas Graf struct ts_config *textsearch_prepare(const char *algo, const void *pattern, 264fd4f2df2SAl Viro unsigned int len, gfp_t gfp_mask, int flags) 2652de4ff7bSThomas Graf { 2662de4ff7bSThomas Graf int err = -ENOENT; 2672de4ff7bSThomas Graf struct ts_config *conf; 2682de4ff7bSThomas Graf struct ts_ops *ops; 2692de4ff7bSThomas Graf 270e03ba84aSPablo Neira Ayuso if (len == 0) 271e03ba84aSPablo Neira Ayuso return ERR_PTR(-EINVAL); 272e03ba84aSPablo Neira Ayuso 2732de4ff7bSThomas Graf ops = lookup_ts_algo(algo); 274a00caa1fSJohannes Berg #ifdef CONFIG_MODULES 2752de4ff7bSThomas Graf /* 2762de4ff7bSThomas Graf * Why not always autoload you may ask. Some users are 2772de4ff7bSThomas Graf * in a situation where requesting a module may deadlock, 2782de4ff7bSThomas Graf * especially when the module is located on a NFS mount. 2792de4ff7bSThomas Graf */ 2802de4ff7bSThomas Graf if (ops == NULL && flags & TS_AUTOLOAD) { 2812de4ff7bSThomas Graf request_module("ts_%s", algo); 2822de4ff7bSThomas Graf ops = lookup_ts_algo(algo); 2832de4ff7bSThomas Graf } 2842de4ff7bSThomas Graf #endif 2852de4ff7bSThomas Graf 2862de4ff7bSThomas Graf if (ops == NULL) 2872de4ff7bSThomas Graf goto errout; 2882de4ff7bSThomas Graf 289b9c79678SJoonwoo Park conf = ops->init(pattern, len, gfp_mask, flags); 2902de4ff7bSThomas Graf if (IS_ERR(conf)) { 2912de4ff7bSThomas Graf err = PTR_ERR(conf); 2922de4ff7bSThomas Graf goto errout; 2932de4ff7bSThomas Graf } 2942de4ff7bSThomas Graf 2952de4ff7bSThomas Graf conf->ops = ops; 2962de4ff7bSThomas Graf return conf; 2972de4ff7bSThomas Graf 2982de4ff7bSThomas Graf errout: 2992de4ff7bSThomas Graf if (ops) 3002de4ff7bSThomas Graf module_put(ops->owner); 3012de4ff7bSThomas Graf 3022de4ff7bSThomas Graf return ERR_PTR(err); 3032de4ff7bSThomas Graf } 304ce643a30SFabian Frederick EXPORT_SYMBOL(textsearch_prepare); 3052de4ff7bSThomas Graf 3062de4ff7bSThomas Graf /** 3072de4ff7bSThomas Graf * textsearch_destroy - destroy a search configuration 3082de4ff7bSThomas Graf * @conf: search configuration 3092de4ff7bSThomas Graf * 3102de4ff7bSThomas Graf * Releases all references of the configuration and frees 3112de4ff7bSThomas Graf * up the memory. 3122de4ff7bSThomas Graf */ 3132de4ff7bSThomas Graf void textsearch_destroy(struct ts_config *conf) 3142de4ff7bSThomas Graf { 3152de4ff7bSThomas Graf if (conf->ops) { 3162de4ff7bSThomas Graf if (conf->ops->destroy) 3172de4ff7bSThomas Graf conf->ops->destroy(conf); 3182de4ff7bSThomas Graf module_put(conf->ops->owner); 3192de4ff7bSThomas Graf } 3202de4ff7bSThomas Graf 3212de4ff7bSThomas Graf kfree(conf); 3222de4ff7bSThomas Graf } 3232de4ff7bSThomas Graf EXPORT_SYMBOL(textsearch_destroy); 324