xref: /openbmc/linux/fs/unicode/utf8-core.c (revision ecc23d0a422a3118fcf6e4f0a46e17a6c2047b02)
19d53690fSGabriel Krisman Bertazi /* SPDX-License-Identifier: GPL-2.0 */
29d53690fSGabriel Krisman Bertazi #include <linux/module.h>
39d53690fSGabriel Krisman Bertazi #include <linux/kernel.h>
49d53690fSGabriel Krisman Bertazi #include <linux/string.h>
59d53690fSGabriel Krisman Bertazi #include <linux/slab.h>
69d53690fSGabriel Krisman Bertazi #include <linux/parser.h>
79d53690fSGabriel Krisman Bertazi #include <linux/errno.h>
83d7bfea8SDaniel Rosenberg #include <linux/stringhash.h>
99d53690fSGabriel Krisman Bertazi 
109d53690fSGabriel Krisman Bertazi #include "utf8n.h"
119d53690fSGabriel Krisman Bertazi 
utf8_validate(const struct unicode_map * um,const struct qstr * str)129d53690fSGabriel Krisman Bertazi int utf8_validate(const struct unicode_map *um, const struct qstr *str)
139d53690fSGabriel Krisman Bertazi {
146ca99ce7SChristoph Hellwig 	if (utf8nlen(um, UTF8_NFDI, str->name, str->len) < 0)
159d53690fSGabriel Krisman Bertazi 		return -1;
169d53690fSGabriel Krisman Bertazi 	return 0;
179d53690fSGabriel Krisman Bertazi }
189d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_validate);
199d53690fSGabriel Krisman Bertazi 
utf8_strncmp(const struct unicode_map * um,const struct qstr * s1,const struct qstr * s2)209d53690fSGabriel Krisman Bertazi int utf8_strncmp(const struct unicode_map *um,
219d53690fSGabriel Krisman Bertazi 		 const struct qstr *s1, const struct qstr *s2)
229d53690fSGabriel Krisman Bertazi {
239d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur1, cur2;
249d53690fSGabriel Krisman Bertazi 	int c1, c2;
259d53690fSGabriel Krisman Bertazi 
266ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur1, um, UTF8_NFDI, s1->name, s1->len) < 0)
279d53690fSGabriel Krisman Bertazi 		return -EINVAL;
289d53690fSGabriel Krisman Bertazi 
296ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur2, um, UTF8_NFDI, s2->name, s2->len) < 0)
309d53690fSGabriel Krisman Bertazi 		return -EINVAL;
319d53690fSGabriel Krisman Bertazi 
329d53690fSGabriel Krisman Bertazi 	do {
339d53690fSGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
349d53690fSGabriel Krisman Bertazi 		c2 = utf8byte(&cur2);
359d53690fSGabriel Krisman Bertazi 
369d53690fSGabriel Krisman Bertazi 		if (c1 < 0 || c2 < 0)
379d53690fSGabriel Krisman Bertazi 			return -EINVAL;
389d53690fSGabriel Krisman Bertazi 		if (c1 != c2)
399d53690fSGabriel Krisman Bertazi 			return 1;
409d53690fSGabriel Krisman Bertazi 	} while (c1);
419d53690fSGabriel Krisman Bertazi 
429d53690fSGabriel Krisman Bertazi 	return 0;
439d53690fSGabriel Krisman Bertazi }
449d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncmp);
459d53690fSGabriel Krisman Bertazi 
utf8_strncasecmp(const struct unicode_map * um,const struct qstr * s1,const struct qstr * s2)469d53690fSGabriel Krisman Bertazi int utf8_strncasecmp(const struct unicode_map *um,
479d53690fSGabriel Krisman Bertazi 		     const struct qstr *s1, const struct qstr *s2)
489d53690fSGabriel Krisman Bertazi {
499d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur1, cur2;
509d53690fSGabriel Krisman Bertazi 	int c1, c2;
519d53690fSGabriel Krisman Bertazi 
526ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
539d53690fSGabriel Krisman Bertazi 		return -EINVAL;
549d53690fSGabriel Krisman Bertazi 
556ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur2, um, UTF8_NFDICF, s2->name, s2->len) < 0)
569d53690fSGabriel Krisman Bertazi 		return -EINVAL;
579d53690fSGabriel Krisman Bertazi 
589d53690fSGabriel Krisman Bertazi 	do {
599d53690fSGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
609d53690fSGabriel Krisman Bertazi 		c2 = utf8byte(&cur2);
619d53690fSGabriel Krisman Bertazi 
629d53690fSGabriel Krisman Bertazi 		if (c1 < 0 || c2 < 0)
639d53690fSGabriel Krisman Bertazi 			return -EINVAL;
649d53690fSGabriel Krisman Bertazi 		if (c1 != c2)
659d53690fSGabriel Krisman Bertazi 			return 1;
669d53690fSGabriel Krisman Bertazi 	} while (c1);
679d53690fSGabriel Krisman Bertazi 
689d53690fSGabriel Krisman Bertazi 	return 0;
699d53690fSGabriel Krisman Bertazi }
709d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp);
719d53690fSGabriel Krisman Bertazi 
723ae72562SGabriel Krisman Bertazi /* String cf is expected to be a valid UTF-8 casefolded
733ae72562SGabriel Krisman Bertazi  * string.
743ae72562SGabriel Krisman Bertazi  */
utf8_strncasecmp_folded(const struct unicode_map * um,const struct qstr * cf,const struct qstr * s1)753ae72562SGabriel Krisman Bertazi int utf8_strncasecmp_folded(const struct unicode_map *um,
763ae72562SGabriel Krisman Bertazi 			    const struct qstr *cf,
773ae72562SGabriel Krisman Bertazi 			    const struct qstr *s1)
783ae72562SGabriel Krisman Bertazi {
793ae72562SGabriel Krisman Bertazi 	struct utf8cursor cur1;
803ae72562SGabriel Krisman Bertazi 	int c1, c2;
813ae72562SGabriel Krisman Bertazi 	int i = 0;
823ae72562SGabriel Krisman Bertazi 
836ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
843ae72562SGabriel Krisman Bertazi 		return -EINVAL;
853ae72562SGabriel Krisman Bertazi 
863ae72562SGabriel Krisman Bertazi 	do {
873ae72562SGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
883ae72562SGabriel Krisman Bertazi 		c2 = cf->name[i++];
893ae72562SGabriel Krisman Bertazi 		if (c1 < 0)
903ae72562SGabriel Krisman Bertazi 			return -EINVAL;
913ae72562SGabriel Krisman Bertazi 		if (c1 != c2)
923ae72562SGabriel Krisman Bertazi 			return 1;
933ae72562SGabriel Krisman Bertazi 	} while (c1);
943ae72562SGabriel Krisman Bertazi 
953ae72562SGabriel Krisman Bertazi 	return 0;
963ae72562SGabriel Krisman Bertazi }
973ae72562SGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp_folded);
983ae72562SGabriel Krisman Bertazi 
utf8_casefold(const struct unicode_map * um,const struct qstr * str,unsigned char * dest,size_t dlen)999d53690fSGabriel Krisman Bertazi int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
1009d53690fSGabriel Krisman Bertazi 		  unsigned char *dest, size_t dlen)
1019d53690fSGabriel Krisman Bertazi {
1029d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur;
1039d53690fSGabriel Krisman Bertazi 	size_t nlen = 0;
1049d53690fSGabriel Krisman Bertazi 
1056ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
1069d53690fSGabriel Krisman Bertazi 		return -EINVAL;
1079d53690fSGabriel Krisman Bertazi 
1089d53690fSGabriel Krisman Bertazi 	for (nlen = 0; nlen < dlen; nlen++) {
1099d53690fSGabriel Krisman Bertazi 		int c = utf8byte(&cur);
1109d53690fSGabriel Krisman Bertazi 
1119d53690fSGabriel Krisman Bertazi 		dest[nlen] = c;
1129d53690fSGabriel Krisman Bertazi 		if (!c)
1139d53690fSGabriel Krisman Bertazi 			return nlen;
1149d53690fSGabriel Krisman Bertazi 		if (c == -1)
1159d53690fSGabriel Krisman Bertazi 			break;
1169d53690fSGabriel Krisman Bertazi 	}
1179d53690fSGabriel Krisman Bertazi 	return -EINVAL;
1189d53690fSGabriel Krisman Bertazi }
1199d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_casefold);
1209d53690fSGabriel Krisman Bertazi 
utf8_casefold_hash(const struct unicode_map * um,const void * salt,struct qstr * str)1213d7bfea8SDaniel Rosenberg int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
1223d7bfea8SDaniel Rosenberg 		       struct qstr *str)
1233d7bfea8SDaniel Rosenberg {
1243d7bfea8SDaniel Rosenberg 	struct utf8cursor cur;
1253d7bfea8SDaniel Rosenberg 	int c;
1263d7bfea8SDaniel Rosenberg 	unsigned long hash = init_name_hash(salt);
1273d7bfea8SDaniel Rosenberg 
1286ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
1293d7bfea8SDaniel Rosenberg 		return -EINVAL;
1303d7bfea8SDaniel Rosenberg 
1313d7bfea8SDaniel Rosenberg 	while ((c = utf8byte(&cur))) {
1323d7bfea8SDaniel Rosenberg 		if (c < 0)
1333d7bfea8SDaniel Rosenberg 			return -EINVAL;
1343d7bfea8SDaniel Rosenberg 		hash = partial_name_hash((unsigned char)c, hash);
1353d7bfea8SDaniel Rosenberg 	}
1363d7bfea8SDaniel Rosenberg 	str->hash = end_name_hash(hash);
1373d7bfea8SDaniel Rosenberg 	return 0;
1383d7bfea8SDaniel Rosenberg }
1393d7bfea8SDaniel Rosenberg EXPORT_SYMBOL(utf8_casefold_hash);
1403d7bfea8SDaniel Rosenberg 
utf8_normalize(const struct unicode_map * um,const struct qstr * str,unsigned char * dest,size_t dlen)1419d53690fSGabriel Krisman Bertazi int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
1429d53690fSGabriel Krisman Bertazi 		   unsigned char *dest, size_t dlen)
1439d53690fSGabriel Krisman Bertazi {
1449d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur;
1459d53690fSGabriel Krisman Bertazi 	ssize_t nlen = 0;
1469d53690fSGabriel Krisman Bertazi 
1476ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur, um, UTF8_NFDI, str->name, str->len) < 0)
1489d53690fSGabriel Krisman Bertazi 		return -EINVAL;
1499d53690fSGabriel Krisman Bertazi 
1509d53690fSGabriel Krisman Bertazi 	for (nlen = 0; nlen < dlen; nlen++) {
1519d53690fSGabriel Krisman Bertazi 		int c = utf8byte(&cur);
1529d53690fSGabriel Krisman Bertazi 
1539d53690fSGabriel Krisman Bertazi 		dest[nlen] = c;
1549d53690fSGabriel Krisman Bertazi 		if (!c)
1559d53690fSGabriel Krisman Bertazi 			return nlen;
1569d53690fSGabriel Krisman Bertazi 		if (c == -1)
1579d53690fSGabriel Krisman Bertazi 			break;
1589d53690fSGabriel Krisman Bertazi 	}
1599d53690fSGabriel Krisman Bertazi 	return -EINVAL;
1609d53690fSGabriel Krisman Bertazi }
1619d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_normalize);
1629d53690fSGabriel Krisman Bertazi 
find_table_version(const struct utf8data * table,size_t nr_entries,unsigned int version)1632b3d0478SChristoph Hellwig static const struct utf8data *find_table_version(const struct utf8data *table,
1642b3d0478SChristoph Hellwig 		size_t nr_entries, unsigned int version)
1652b3d0478SChristoph Hellwig {
1662b3d0478SChristoph Hellwig 	size_t i = nr_entries - 1;
1672b3d0478SChristoph Hellwig 
1682b3d0478SChristoph Hellwig 	while (version < table[i].maxage)
1692b3d0478SChristoph Hellwig 		i--;
1702b3d0478SChristoph Hellwig 	if (version > table[i].maxage)
1712b3d0478SChristoph Hellwig 		return NULL;
1722b3d0478SChristoph Hellwig 	return &table[i];
1732b3d0478SChristoph Hellwig }
1742b3d0478SChristoph Hellwig 
utf8_load(unsigned int version)17549bd03ccSChristoph Hellwig struct unicode_map *utf8_load(unsigned int version)
1769d53690fSGabriel Krisman Bertazi {
17749bd03ccSChristoph Hellwig 	struct unicode_map *um;
1789d53690fSGabriel Krisman Bertazi 
1799d53690fSGabriel Krisman Bertazi 	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
1809d53690fSGabriel Krisman Bertazi 	if (!um)
1819d53690fSGabriel Krisman Bertazi 		return ERR_PTR(-ENOMEM);
18249bd03ccSChristoph Hellwig 	um->version = version;
1832b3d0478SChristoph Hellwig 
1842b3d0478SChristoph Hellwig 	um->tables = symbol_request(utf8_data_table);
1852b3d0478SChristoph Hellwig 	if (!um->tables)
1862b3d0478SChristoph Hellwig 		goto out_free_um;
1872b3d0478SChristoph Hellwig 
1882b3d0478SChristoph Hellwig 	if (!utf8version_is_supported(um, version))
1892b3d0478SChristoph Hellwig 		goto out_symbol_put;
1902b3d0478SChristoph Hellwig 	um->ntab[UTF8_NFDI] = find_table_version(um->tables->utf8nfdidata,
1912b3d0478SChristoph Hellwig 			um->tables->utf8nfdidata_size, um->version);
1926ca99ce7SChristoph Hellwig 	if (!um->ntab[UTF8_NFDI])
1932b3d0478SChristoph Hellwig 		goto out_symbol_put;
1942b3d0478SChristoph Hellwig 	um->ntab[UTF8_NFDICF] = find_table_version(um->tables->utf8nfdicfdata,
1952b3d0478SChristoph Hellwig 			um->tables->utf8nfdicfdata_size, um->version);
1966ca99ce7SChristoph Hellwig 	if (!um->ntab[UTF8_NFDICF])
1972b3d0478SChristoph Hellwig 		goto out_symbol_put;
1989d53690fSGabriel Krisman Bertazi 	return um;
1996ca99ce7SChristoph Hellwig 
2002b3d0478SChristoph Hellwig out_symbol_put:
201*c4b6c178SAndré Almeida 	symbol_put(utf8_data_table);
2026ca99ce7SChristoph Hellwig out_free_um:
2036ca99ce7SChristoph Hellwig 	kfree(um);
2046ca99ce7SChristoph Hellwig 	return ERR_PTR(-EINVAL);
2059d53690fSGabriel Krisman Bertazi }
2069d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_load);
2079d53690fSGabriel Krisman Bertazi 
utf8_unload(struct unicode_map * um)2089d53690fSGabriel Krisman Bertazi void utf8_unload(struct unicode_map *um)
2099d53690fSGabriel Krisman Bertazi {
2102b3d0478SChristoph Hellwig 	if (um) {
2112b3d0478SChristoph Hellwig 		symbol_put(utf8_data_table);
2129d53690fSGabriel Krisman Bertazi 		kfree(um);
2139d53690fSGabriel Krisman Bertazi 	}
2142b3d0478SChristoph Hellwig }
2159d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_unload);
2169d53690fSGabriel Krisman Bertazi 
217