xref: /openbmc/linux/fs/unicode/utf8-core.c (revision 49bd03cc)
19d53690fSGabriel Krisman Bertazi /* SPDX-License-Identifier: GPL-2.0 */
29d53690fSGabriel Krisman Bertazi #include <linux/module.h>
39d53690fSGabriel Krisman Bertazi #include <linux/kernel.h>
49d53690fSGabriel Krisman Bertazi #include <linux/string.h>
59d53690fSGabriel Krisman Bertazi #include <linux/slab.h>
69d53690fSGabriel Krisman Bertazi #include <linux/parser.h>
79d53690fSGabriel Krisman Bertazi #include <linux/errno.h>
89d53690fSGabriel Krisman Bertazi #include <linux/unicode.h>
93d7bfea8SDaniel Rosenberg #include <linux/stringhash.h>
109d53690fSGabriel Krisman Bertazi 
119d53690fSGabriel Krisman Bertazi #include "utf8n.h"
129d53690fSGabriel Krisman Bertazi 
139d53690fSGabriel Krisman Bertazi int utf8_validate(const struct unicode_map *um, const struct qstr *str)
149d53690fSGabriel Krisman Bertazi {
159d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdi(um->version);
169d53690fSGabriel Krisman Bertazi 
179d53690fSGabriel Krisman Bertazi 	if (utf8nlen(data, str->name, str->len) < 0)
189d53690fSGabriel Krisman Bertazi 		return -1;
199d53690fSGabriel Krisman Bertazi 	return 0;
209d53690fSGabriel Krisman Bertazi }
219d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_validate);
229d53690fSGabriel Krisman Bertazi 
239d53690fSGabriel Krisman Bertazi int utf8_strncmp(const struct unicode_map *um,
249d53690fSGabriel Krisman Bertazi 		 const struct qstr *s1, const struct qstr *s2)
259d53690fSGabriel Krisman Bertazi {
269d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdi(um->version);
279d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur1, cur2;
289d53690fSGabriel Krisman Bertazi 	int c1, c2;
299d53690fSGabriel Krisman Bertazi 
309d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
319d53690fSGabriel Krisman Bertazi 		return -EINVAL;
329d53690fSGabriel Krisman Bertazi 
339d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
349d53690fSGabriel Krisman Bertazi 		return -EINVAL;
359d53690fSGabriel Krisman Bertazi 
369d53690fSGabriel Krisman Bertazi 	do {
379d53690fSGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
389d53690fSGabriel Krisman Bertazi 		c2 = utf8byte(&cur2);
399d53690fSGabriel Krisman Bertazi 
409d53690fSGabriel Krisman Bertazi 		if (c1 < 0 || c2 < 0)
419d53690fSGabriel Krisman Bertazi 			return -EINVAL;
429d53690fSGabriel Krisman Bertazi 		if (c1 != c2)
439d53690fSGabriel Krisman Bertazi 			return 1;
449d53690fSGabriel Krisman Bertazi 	} while (c1);
459d53690fSGabriel Krisman Bertazi 
469d53690fSGabriel Krisman Bertazi 	return 0;
479d53690fSGabriel Krisman Bertazi }
489d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncmp);
499d53690fSGabriel Krisman Bertazi 
509d53690fSGabriel Krisman Bertazi int utf8_strncasecmp(const struct unicode_map *um,
519d53690fSGabriel Krisman Bertazi 		     const struct qstr *s1, const struct qstr *s2)
529d53690fSGabriel Krisman Bertazi {
539d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdicf(um->version);
549d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur1, cur2;
559d53690fSGabriel Krisman Bertazi 	int c1, c2;
569d53690fSGabriel Krisman Bertazi 
579d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
589d53690fSGabriel Krisman Bertazi 		return -EINVAL;
599d53690fSGabriel Krisman Bertazi 
609d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
619d53690fSGabriel Krisman Bertazi 		return -EINVAL;
629d53690fSGabriel Krisman Bertazi 
639d53690fSGabriel Krisman Bertazi 	do {
649d53690fSGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
659d53690fSGabriel Krisman Bertazi 		c2 = utf8byte(&cur2);
669d53690fSGabriel Krisman Bertazi 
679d53690fSGabriel Krisman Bertazi 		if (c1 < 0 || c2 < 0)
689d53690fSGabriel Krisman Bertazi 			return -EINVAL;
699d53690fSGabriel Krisman Bertazi 		if (c1 != c2)
709d53690fSGabriel Krisman Bertazi 			return 1;
719d53690fSGabriel Krisman Bertazi 	} while (c1);
729d53690fSGabriel Krisman Bertazi 
739d53690fSGabriel Krisman Bertazi 	return 0;
749d53690fSGabriel Krisman Bertazi }
759d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp);
769d53690fSGabriel Krisman Bertazi 
773ae72562SGabriel Krisman Bertazi /* String cf is expected to be a valid UTF-8 casefolded
783ae72562SGabriel Krisman Bertazi  * string.
793ae72562SGabriel Krisman Bertazi  */
803ae72562SGabriel Krisman Bertazi int utf8_strncasecmp_folded(const struct unicode_map *um,
813ae72562SGabriel Krisman Bertazi 			    const struct qstr *cf,
823ae72562SGabriel Krisman Bertazi 			    const struct qstr *s1)
833ae72562SGabriel Krisman Bertazi {
843ae72562SGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdicf(um->version);
853ae72562SGabriel Krisman Bertazi 	struct utf8cursor cur1;
863ae72562SGabriel Krisman Bertazi 	int c1, c2;
873ae72562SGabriel Krisman Bertazi 	int i = 0;
883ae72562SGabriel Krisman Bertazi 
893ae72562SGabriel Krisman Bertazi 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
903ae72562SGabriel Krisman Bertazi 		return -EINVAL;
913ae72562SGabriel Krisman Bertazi 
923ae72562SGabriel Krisman Bertazi 	do {
933ae72562SGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
943ae72562SGabriel Krisman Bertazi 		c2 = cf->name[i++];
953ae72562SGabriel Krisman Bertazi 		if (c1 < 0)
963ae72562SGabriel Krisman Bertazi 			return -EINVAL;
973ae72562SGabriel Krisman Bertazi 		if (c1 != c2)
983ae72562SGabriel Krisman Bertazi 			return 1;
993ae72562SGabriel Krisman Bertazi 	} while (c1);
1003ae72562SGabriel Krisman Bertazi 
1013ae72562SGabriel Krisman Bertazi 	return 0;
1023ae72562SGabriel Krisman Bertazi }
1033ae72562SGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp_folded);
1043ae72562SGabriel Krisman Bertazi 
1059d53690fSGabriel Krisman Bertazi int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
1069d53690fSGabriel Krisman Bertazi 		  unsigned char *dest, size_t dlen)
1079d53690fSGabriel Krisman Bertazi {
1089d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdicf(um->version);
1099d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur;
1109d53690fSGabriel Krisman Bertazi 	size_t nlen = 0;
1119d53690fSGabriel Krisman Bertazi 
1129d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
1139d53690fSGabriel Krisman Bertazi 		return -EINVAL;
1149d53690fSGabriel Krisman Bertazi 
1159d53690fSGabriel Krisman Bertazi 	for (nlen = 0; nlen < dlen; nlen++) {
1169d53690fSGabriel Krisman Bertazi 		int c = utf8byte(&cur);
1179d53690fSGabriel Krisman Bertazi 
1189d53690fSGabriel Krisman Bertazi 		dest[nlen] = c;
1199d53690fSGabriel Krisman Bertazi 		if (!c)
1209d53690fSGabriel Krisman Bertazi 			return nlen;
1219d53690fSGabriel Krisman Bertazi 		if (c == -1)
1229d53690fSGabriel Krisman Bertazi 			break;
1239d53690fSGabriel Krisman Bertazi 	}
1249d53690fSGabriel Krisman Bertazi 	return -EINVAL;
1259d53690fSGabriel Krisman Bertazi }
1269d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_casefold);
1279d53690fSGabriel Krisman Bertazi 
1283d7bfea8SDaniel Rosenberg int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
1293d7bfea8SDaniel Rosenberg 		       struct qstr *str)
1303d7bfea8SDaniel Rosenberg {
1313d7bfea8SDaniel Rosenberg 	const struct utf8data *data = utf8nfdicf(um->version);
1323d7bfea8SDaniel Rosenberg 	struct utf8cursor cur;
1333d7bfea8SDaniel Rosenberg 	int c;
1343d7bfea8SDaniel Rosenberg 	unsigned long hash = init_name_hash(salt);
1353d7bfea8SDaniel Rosenberg 
1363d7bfea8SDaniel Rosenberg 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
1373d7bfea8SDaniel Rosenberg 		return -EINVAL;
1383d7bfea8SDaniel Rosenberg 
1393d7bfea8SDaniel Rosenberg 	while ((c = utf8byte(&cur))) {
1403d7bfea8SDaniel Rosenberg 		if (c < 0)
1413d7bfea8SDaniel Rosenberg 			return -EINVAL;
1423d7bfea8SDaniel Rosenberg 		hash = partial_name_hash((unsigned char)c, hash);
1433d7bfea8SDaniel Rosenberg 	}
1443d7bfea8SDaniel Rosenberg 	str->hash = end_name_hash(hash);
1453d7bfea8SDaniel Rosenberg 	return 0;
1463d7bfea8SDaniel Rosenberg }
1473d7bfea8SDaniel Rosenberg EXPORT_SYMBOL(utf8_casefold_hash);
1483d7bfea8SDaniel Rosenberg 
1499d53690fSGabriel Krisman Bertazi int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
1509d53690fSGabriel Krisman Bertazi 		   unsigned char *dest, size_t dlen)
1519d53690fSGabriel Krisman Bertazi {
1529d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdi(um->version);
1539d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur;
1549d53690fSGabriel Krisman Bertazi 	ssize_t nlen = 0;
1559d53690fSGabriel Krisman Bertazi 
1569d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
1579d53690fSGabriel Krisman Bertazi 		return -EINVAL;
1589d53690fSGabriel Krisman Bertazi 
1599d53690fSGabriel Krisman Bertazi 	for (nlen = 0; nlen < dlen; nlen++) {
1609d53690fSGabriel Krisman Bertazi 		int c = utf8byte(&cur);
1619d53690fSGabriel Krisman Bertazi 
1629d53690fSGabriel Krisman Bertazi 		dest[nlen] = c;
1639d53690fSGabriel Krisman Bertazi 		if (!c)
1649d53690fSGabriel Krisman Bertazi 			return nlen;
1659d53690fSGabriel Krisman Bertazi 		if (c == -1)
1669d53690fSGabriel Krisman Bertazi 			break;
1679d53690fSGabriel Krisman Bertazi 	}
1689d53690fSGabriel Krisman Bertazi 	return -EINVAL;
1699d53690fSGabriel Krisman Bertazi }
1709d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_normalize);
1719d53690fSGabriel Krisman Bertazi 
172*49bd03ccSChristoph Hellwig struct unicode_map *utf8_load(unsigned int version)
1739d53690fSGabriel Krisman Bertazi {
174*49bd03ccSChristoph Hellwig 	struct unicode_map *um;
1759d53690fSGabriel Krisman Bertazi 
176*49bd03ccSChristoph Hellwig 	if (!utf8version_is_supported(version))
1779d53690fSGabriel Krisman Bertazi 		return ERR_PTR(-EINVAL);
1789d53690fSGabriel Krisman Bertazi 
1799d53690fSGabriel Krisman Bertazi 	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
1809d53690fSGabriel Krisman Bertazi 	if (!um)
1819d53690fSGabriel Krisman Bertazi 		return ERR_PTR(-ENOMEM);
182*49bd03ccSChristoph Hellwig 	um->version = version;
1839d53690fSGabriel Krisman Bertazi 	return um;
1849d53690fSGabriel Krisman Bertazi }
1859d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_load);
1869d53690fSGabriel Krisman Bertazi 
1879d53690fSGabriel Krisman Bertazi void utf8_unload(struct unicode_map *um)
1889d53690fSGabriel Krisman Bertazi {
1899d53690fSGabriel Krisman Bertazi 	kfree(um);
1909d53690fSGabriel Krisman Bertazi }
1919d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_unload);
1929d53690fSGabriel Krisman Bertazi 
1939d53690fSGabriel Krisman Bertazi MODULE_LICENSE("GPL v2");
194