xref: /openbmc/linux/fs/unicode/utf8-core.c (revision 6ca99ce756c27852d1ea1e555045de1c920f30ed)
19d53690fSGabriel Krisman Bertazi /* SPDX-License-Identifier: GPL-2.0 */
29d53690fSGabriel Krisman Bertazi #include <linux/module.h>
39d53690fSGabriel Krisman Bertazi #include <linux/kernel.h>
49d53690fSGabriel Krisman Bertazi #include <linux/string.h>
59d53690fSGabriel Krisman Bertazi #include <linux/slab.h>
69d53690fSGabriel Krisman Bertazi #include <linux/parser.h>
79d53690fSGabriel Krisman Bertazi #include <linux/errno.h>
83d7bfea8SDaniel Rosenberg #include <linux/stringhash.h>
99d53690fSGabriel Krisman Bertazi 
109d53690fSGabriel Krisman Bertazi #include "utf8n.h"
119d53690fSGabriel Krisman Bertazi 
129d53690fSGabriel Krisman Bertazi int utf8_validate(const struct unicode_map *um, const struct qstr *str)
139d53690fSGabriel Krisman Bertazi {
14*6ca99ce7SChristoph Hellwig 	if (utf8nlen(um, UTF8_NFDI, str->name, str->len) < 0)
159d53690fSGabriel Krisman Bertazi 		return -1;
169d53690fSGabriel Krisman Bertazi 	return 0;
179d53690fSGabriel Krisman Bertazi }
189d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_validate);
199d53690fSGabriel Krisman Bertazi 
209d53690fSGabriel Krisman Bertazi int utf8_strncmp(const struct unicode_map *um,
219d53690fSGabriel Krisman Bertazi 		 const struct qstr *s1, const struct qstr *s2)
229d53690fSGabriel Krisman Bertazi {
239d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur1, cur2;
249d53690fSGabriel Krisman Bertazi 	int c1, c2;
259d53690fSGabriel Krisman Bertazi 
26*6ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur1, um, UTF8_NFDI, s1->name, s1->len) < 0)
279d53690fSGabriel Krisman Bertazi 		return -EINVAL;
289d53690fSGabriel Krisman Bertazi 
29*6ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur2, um, UTF8_NFDI, s2->name, s2->len) < 0)
309d53690fSGabriel Krisman Bertazi 		return -EINVAL;
319d53690fSGabriel Krisman Bertazi 
329d53690fSGabriel Krisman Bertazi 	do {
339d53690fSGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
349d53690fSGabriel Krisman Bertazi 		c2 = utf8byte(&cur2);
359d53690fSGabriel Krisman Bertazi 
369d53690fSGabriel Krisman Bertazi 		if (c1 < 0 || c2 < 0)
379d53690fSGabriel Krisman Bertazi 			return -EINVAL;
389d53690fSGabriel Krisman Bertazi 		if (c1 != c2)
399d53690fSGabriel Krisman Bertazi 			return 1;
409d53690fSGabriel Krisman Bertazi 	} while (c1);
419d53690fSGabriel Krisman Bertazi 
429d53690fSGabriel Krisman Bertazi 	return 0;
439d53690fSGabriel Krisman Bertazi }
449d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncmp);
459d53690fSGabriel Krisman Bertazi 
469d53690fSGabriel Krisman Bertazi int utf8_strncasecmp(const struct unicode_map *um,
479d53690fSGabriel Krisman Bertazi 		     const struct qstr *s1, const struct qstr *s2)
489d53690fSGabriel Krisman Bertazi {
499d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur1, cur2;
509d53690fSGabriel Krisman Bertazi 	int c1, c2;
519d53690fSGabriel Krisman Bertazi 
52*6ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
539d53690fSGabriel Krisman Bertazi 		return -EINVAL;
549d53690fSGabriel Krisman Bertazi 
55*6ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur2, um, UTF8_NFDICF, s2->name, s2->len) < 0)
569d53690fSGabriel Krisman Bertazi 		return -EINVAL;
579d53690fSGabriel Krisman Bertazi 
589d53690fSGabriel Krisman Bertazi 	do {
599d53690fSGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
609d53690fSGabriel Krisman Bertazi 		c2 = utf8byte(&cur2);
619d53690fSGabriel Krisman Bertazi 
629d53690fSGabriel Krisman Bertazi 		if (c1 < 0 || c2 < 0)
639d53690fSGabriel Krisman Bertazi 			return -EINVAL;
649d53690fSGabriel Krisman Bertazi 		if (c1 != c2)
659d53690fSGabriel Krisman Bertazi 			return 1;
669d53690fSGabriel Krisman Bertazi 	} while (c1);
679d53690fSGabriel Krisman Bertazi 
689d53690fSGabriel Krisman Bertazi 	return 0;
699d53690fSGabriel Krisman Bertazi }
709d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp);
719d53690fSGabriel Krisman Bertazi 
723ae72562SGabriel Krisman Bertazi /* String cf is expected to be a valid UTF-8 casefolded
733ae72562SGabriel Krisman Bertazi  * string.
743ae72562SGabriel Krisman Bertazi  */
753ae72562SGabriel Krisman Bertazi int utf8_strncasecmp_folded(const struct unicode_map *um,
763ae72562SGabriel Krisman Bertazi 			    const struct qstr *cf,
773ae72562SGabriel Krisman Bertazi 			    const struct qstr *s1)
783ae72562SGabriel Krisman Bertazi {
793ae72562SGabriel Krisman Bertazi 	struct utf8cursor cur1;
803ae72562SGabriel Krisman Bertazi 	int c1, c2;
813ae72562SGabriel Krisman Bertazi 	int i = 0;
823ae72562SGabriel Krisman Bertazi 
83*6ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
843ae72562SGabriel Krisman Bertazi 		return -EINVAL;
853ae72562SGabriel Krisman Bertazi 
863ae72562SGabriel Krisman Bertazi 	do {
873ae72562SGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
883ae72562SGabriel Krisman Bertazi 		c2 = cf->name[i++];
893ae72562SGabriel Krisman Bertazi 		if (c1 < 0)
903ae72562SGabriel Krisman Bertazi 			return -EINVAL;
913ae72562SGabriel Krisman Bertazi 		if (c1 != c2)
923ae72562SGabriel Krisman Bertazi 			return 1;
933ae72562SGabriel Krisman Bertazi 	} while (c1);
943ae72562SGabriel Krisman Bertazi 
953ae72562SGabriel Krisman Bertazi 	return 0;
963ae72562SGabriel Krisman Bertazi }
973ae72562SGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp_folded);
983ae72562SGabriel Krisman Bertazi 
999d53690fSGabriel Krisman Bertazi int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
1009d53690fSGabriel Krisman Bertazi 		  unsigned char *dest, size_t dlen)
1019d53690fSGabriel Krisman Bertazi {
1029d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur;
1039d53690fSGabriel Krisman Bertazi 	size_t nlen = 0;
1049d53690fSGabriel Krisman Bertazi 
105*6ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
1069d53690fSGabriel Krisman Bertazi 		return -EINVAL;
1079d53690fSGabriel Krisman Bertazi 
1089d53690fSGabriel Krisman Bertazi 	for (nlen = 0; nlen < dlen; nlen++) {
1099d53690fSGabriel Krisman Bertazi 		int c = utf8byte(&cur);
1109d53690fSGabriel Krisman Bertazi 
1119d53690fSGabriel Krisman Bertazi 		dest[nlen] = c;
1129d53690fSGabriel Krisman Bertazi 		if (!c)
1139d53690fSGabriel Krisman Bertazi 			return nlen;
1149d53690fSGabriel Krisman Bertazi 		if (c == -1)
1159d53690fSGabriel Krisman Bertazi 			break;
1169d53690fSGabriel Krisman Bertazi 	}
1179d53690fSGabriel Krisman Bertazi 	return -EINVAL;
1189d53690fSGabriel Krisman Bertazi }
1199d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_casefold);
1209d53690fSGabriel Krisman Bertazi 
1213d7bfea8SDaniel Rosenberg int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
1223d7bfea8SDaniel Rosenberg 		       struct qstr *str)
1233d7bfea8SDaniel Rosenberg {
1243d7bfea8SDaniel Rosenberg 	struct utf8cursor cur;
1253d7bfea8SDaniel Rosenberg 	int c;
1263d7bfea8SDaniel Rosenberg 	unsigned long hash = init_name_hash(salt);
1273d7bfea8SDaniel Rosenberg 
128*6ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
1293d7bfea8SDaniel Rosenberg 		return -EINVAL;
1303d7bfea8SDaniel Rosenberg 
1313d7bfea8SDaniel Rosenberg 	while ((c = utf8byte(&cur))) {
1323d7bfea8SDaniel Rosenberg 		if (c < 0)
1333d7bfea8SDaniel Rosenberg 			return -EINVAL;
1343d7bfea8SDaniel Rosenberg 		hash = partial_name_hash((unsigned char)c, hash);
1353d7bfea8SDaniel Rosenberg 	}
1363d7bfea8SDaniel Rosenberg 	str->hash = end_name_hash(hash);
1373d7bfea8SDaniel Rosenberg 	return 0;
1383d7bfea8SDaniel Rosenberg }
1393d7bfea8SDaniel Rosenberg EXPORT_SYMBOL(utf8_casefold_hash);
1403d7bfea8SDaniel Rosenberg 
1419d53690fSGabriel Krisman Bertazi int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
1429d53690fSGabriel Krisman Bertazi 		   unsigned char *dest, size_t dlen)
1439d53690fSGabriel Krisman Bertazi {
1449d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur;
1459d53690fSGabriel Krisman Bertazi 	ssize_t nlen = 0;
1469d53690fSGabriel Krisman Bertazi 
147*6ca99ce7SChristoph Hellwig 	if (utf8ncursor(&cur, um, UTF8_NFDI, str->name, str->len) < 0)
1489d53690fSGabriel Krisman Bertazi 		return -EINVAL;
1499d53690fSGabriel Krisman Bertazi 
1509d53690fSGabriel Krisman Bertazi 	for (nlen = 0; nlen < dlen; nlen++) {
1519d53690fSGabriel Krisman Bertazi 		int c = utf8byte(&cur);
1529d53690fSGabriel Krisman Bertazi 
1539d53690fSGabriel Krisman Bertazi 		dest[nlen] = c;
1549d53690fSGabriel Krisman Bertazi 		if (!c)
1559d53690fSGabriel Krisman Bertazi 			return nlen;
1569d53690fSGabriel Krisman Bertazi 		if (c == -1)
1579d53690fSGabriel Krisman Bertazi 			break;
1589d53690fSGabriel Krisman Bertazi 	}
1599d53690fSGabriel Krisman Bertazi 	return -EINVAL;
1609d53690fSGabriel Krisman Bertazi }
1619d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_normalize);
1629d53690fSGabriel Krisman Bertazi 
16349bd03ccSChristoph Hellwig struct unicode_map *utf8_load(unsigned int version)
1649d53690fSGabriel Krisman Bertazi {
16549bd03ccSChristoph Hellwig 	struct unicode_map *um;
1669d53690fSGabriel Krisman Bertazi 
16749bd03ccSChristoph Hellwig 	if (!utf8version_is_supported(version))
1689d53690fSGabriel Krisman Bertazi 		return ERR_PTR(-EINVAL);
1699d53690fSGabriel Krisman Bertazi 
1709d53690fSGabriel Krisman Bertazi 	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
1719d53690fSGabriel Krisman Bertazi 	if (!um)
1729d53690fSGabriel Krisman Bertazi 		return ERR_PTR(-ENOMEM);
17349bd03ccSChristoph Hellwig 	um->version = version;
174*6ca99ce7SChristoph Hellwig 	um->ntab[UTF8_NFDI] = utf8nfdi(version);
175*6ca99ce7SChristoph Hellwig 	if (!um->ntab[UTF8_NFDI])
176*6ca99ce7SChristoph Hellwig 		goto out_free_um;
177*6ca99ce7SChristoph Hellwig 	um->ntab[UTF8_NFDICF] = utf8nfdicf(version);
178*6ca99ce7SChristoph Hellwig 	if (!um->ntab[UTF8_NFDICF])
179*6ca99ce7SChristoph Hellwig 		goto out_free_um;
1809d53690fSGabriel Krisman Bertazi 	return um;
181*6ca99ce7SChristoph Hellwig 
182*6ca99ce7SChristoph Hellwig out_free_um:
183*6ca99ce7SChristoph Hellwig 	kfree(um);
184*6ca99ce7SChristoph Hellwig 	return ERR_PTR(-EINVAL);
1859d53690fSGabriel Krisman Bertazi }
1869d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_load);
1879d53690fSGabriel Krisman Bertazi 
1889d53690fSGabriel Krisman Bertazi void utf8_unload(struct unicode_map *um)
1899d53690fSGabriel Krisman Bertazi {
1909d53690fSGabriel Krisman Bertazi 	kfree(um);
1919d53690fSGabriel Krisman Bertazi }
1929d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_unload);
1939d53690fSGabriel Krisman Bertazi 
1949d53690fSGabriel Krisman Bertazi MODULE_LICENSE("GPL v2");
195