xref: /openbmc/linux/fs/unicode/utf8-core.c (revision 3ae72562)
19d53690fSGabriel Krisman Bertazi /* SPDX-License-Identifier: GPL-2.0 */
29d53690fSGabriel Krisman Bertazi #include <linux/module.h>
39d53690fSGabriel Krisman Bertazi #include <linux/kernel.h>
49d53690fSGabriel Krisman Bertazi #include <linux/string.h>
59d53690fSGabriel Krisman Bertazi #include <linux/slab.h>
69d53690fSGabriel Krisman Bertazi #include <linux/parser.h>
79d53690fSGabriel Krisman Bertazi #include <linux/errno.h>
89d53690fSGabriel Krisman Bertazi #include <linux/unicode.h>
99d53690fSGabriel Krisman Bertazi 
109d53690fSGabriel Krisman Bertazi #include "utf8n.h"
119d53690fSGabriel Krisman Bertazi 
129d53690fSGabriel Krisman Bertazi int utf8_validate(const struct unicode_map *um, const struct qstr *str)
139d53690fSGabriel Krisman Bertazi {
149d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdi(um->version);
159d53690fSGabriel Krisman Bertazi 
169d53690fSGabriel Krisman Bertazi 	if (utf8nlen(data, str->name, str->len) < 0)
179d53690fSGabriel Krisman Bertazi 		return -1;
189d53690fSGabriel Krisman Bertazi 	return 0;
199d53690fSGabriel Krisman Bertazi }
209d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_validate);
219d53690fSGabriel Krisman Bertazi 
229d53690fSGabriel Krisman Bertazi int utf8_strncmp(const struct unicode_map *um,
239d53690fSGabriel Krisman Bertazi 		 const struct qstr *s1, const struct qstr *s2)
249d53690fSGabriel Krisman Bertazi {
259d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdi(um->version);
269d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur1, cur2;
279d53690fSGabriel Krisman Bertazi 	int c1, c2;
289d53690fSGabriel Krisman Bertazi 
299d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
309d53690fSGabriel Krisman Bertazi 		return -EINVAL;
319d53690fSGabriel Krisman Bertazi 
329d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
339d53690fSGabriel Krisman Bertazi 		return -EINVAL;
349d53690fSGabriel Krisman Bertazi 
359d53690fSGabriel Krisman Bertazi 	do {
369d53690fSGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
379d53690fSGabriel Krisman Bertazi 		c2 = utf8byte(&cur2);
389d53690fSGabriel Krisman Bertazi 
399d53690fSGabriel Krisman Bertazi 		if (c1 < 0 || c2 < 0)
409d53690fSGabriel Krisman Bertazi 			return -EINVAL;
419d53690fSGabriel Krisman Bertazi 		if (c1 != c2)
429d53690fSGabriel Krisman Bertazi 			return 1;
439d53690fSGabriel Krisman Bertazi 	} while (c1);
449d53690fSGabriel Krisman Bertazi 
459d53690fSGabriel Krisman Bertazi 	return 0;
469d53690fSGabriel Krisman Bertazi }
479d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncmp);
489d53690fSGabriel Krisman Bertazi 
499d53690fSGabriel Krisman Bertazi int utf8_strncasecmp(const struct unicode_map *um,
509d53690fSGabriel Krisman Bertazi 		     const struct qstr *s1, const struct qstr *s2)
519d53690fSGabriel Krisman Bertazi {
529d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdicf(um->version);
539d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur1, cur2;
549d53690fSGabriel Krisman Bertazi 	int c1, c2;
559d53690fSGabriel Krisman Bertazi 
569d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
579d53690fSGabriel Krisman Bertazi 		return -EINVAL;
589d53690fSGabriel Krisman Bertazi 
599d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
609d53690fSGabriel Krisman Bertazi 		return -EINVAL;
619d53690fSGabriel Krisman Bertazi 
629d53690fSGabriel Krisman Bertazi 	do {
639d53690fSGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
649d53690fSGabriel Krisman Bertazi 		c2 = utf8byte(&cur2);
659d53690fSGabriel Krisman Bertazi 
669d53690fSGabriel Krisman Bertazi 		if (c1 < 0 || c2 < 0)
679d53690fSGabriel Krisman Bertazi 			return -EINVAL;
689d53690fSGabriel Krisman Bertazi 		if (c1 != c2)
699d53690fSGabriel Krisman Bertazi 			return 1;
709d53690fSGabriel Krisman Bertazi 	} while (c1);
719d53690fSGabriel Krisman Bertazi 
729d53690fSGabriel Krisman Bertazi 	return 0;
739d53690fSGabriel Krisman Bertazi }
749d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp);
759d53690fSGabriel Krisman Bertazi 
763ae72562SGabriel Krisman Bertazi /* String cf is expected to be a valid UTF-8 casefolded
773ae72562SGabriel Krisman Bertazi  * string.
783ae72562SGabriel Krisman Bertazi  */
793ae72562SGabriel Krisman Bertazi int utf8_strncasecmp_folded(const struct unicode_map *um,
803ae72562SGabriel Krisman Bertazi 			    const struct qstr *cf,
813ae72562SGabriel Krisman Bertazi 			    const struct qstr *s1)
823ae72562SGabriel Krisman Bertazi {
833ae72562SGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdicf(um->version);
843ae72562SGabriel Krisman Bertazi 	struct utf8cursor cur1;
853ae72562SGabriel Krisman Bertazi 	int c1, c2;
863ae72562SGabriel Krisman Bertazi 	int i = 0;
873ae72562SGabriel Krisman Bertazi 
883ae72562SGabriel Krisman Bertazi 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
893ae72562SGabriel Krisman Bertazi 		return -EINVAL;
903ae72562SGabriel Krisman Bertazi 
913ae72562SGabriel Krisman Bertazi 	do {
923ae72562SGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
933ae72562SGabriel Krisman Bertazi 		c2 = cf->name[i++];
943ae72562SGabriel Krisman Bertazi 		if (c1 < 0)
953ae72562SGabriel Krisman Bertazi 			return -EINVAL;
963ae72562SGabriel Krisman Bertazi 		if (c1 != c2)
973ae72562SGabriel Krisman Bertazi 			return 1;
983ae72562SGabriel Krisman Bertazi 	} while (c1);
993ae72562SGabriel Krisman Bertazi 
1003ae72562SGabriel Krisman Bertazi 	return 0;
1013ae72562SGabriel Krisman Bertazi }
1023ae72562SGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp_folded);
1033ae72562SGabriel Krisman Bertazi 
1049d53690fSGabriel Krisman Bertazi int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
1059d53690fSGabriel Krisman Bertazi 		  unsigned char *dest, size_t dlen)
1069d53690fSGabriel Krisman Bertazi {
1079d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdicf(um->version);
1089d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur;
1099d53690fSGabriel Krisman Bertazi 	size_t nlen = 0;
1109d53690fSGabriel Krisman Bertazi 
1119d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
1129d53690fSGabriel Krisman Bertazi 		return -EINVAL;
1139d53690fSGabriel Krisman Bertazi 
1149d53690fSGabriel Krisman Bertazi 	for (nlen = 0; nlen < dlen; nlen++) {
1159d53690fSGabriel Krisman Bertazi 		int c = utf8byte(&cur);
1169d53690fSGabriel Krisman Bertazi 
1179d53690fSGabriel Krisman Bertazi 		dest[nlen] = c;
1189d53690fSGabriel Krisman Bertazi 		if (!c)
1199d53690fSGabriel Krisman Bertazi 			return nlen;
1209d53690fSGabriel Krisman Bertazi 		if (c == -1)
1219d53690fSGabriel Krisman Bertazi 			break;
1229d53690fSGabriel Krisman Bertazi 	}
1239d53690fSGabriel Krisman Bertazi 	return -EINVAL;
1249d53690fSGabriel Krisman Bertazi }
1259d53690fSGabriel Krisman Bertazi 
1269d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_casefold);
1279d53690fSGabriel Krisman Bertazi 
1289d53690fSGabriel Krisman Bertazi int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
1299d53690fSGabriel Krisman Bertazi 		   unsigned char *dest, size_t dlen)
1309d53690fSGabriel Krisman Bertazi {
1319d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdi(um->version);
1329d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur;
1339d53690fSGabriel Krisman Bertazi 	ssize_t nlen = 0;
1349d53690fSGabriel Krisman Bertazi 
1359d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
1369d53690fSGabriel Krisman Bertazi 		return -EINVAL;
1379d53690fSGabriel Krisman Bertazi 
1389d53690fSGabriel Krisman Bertazi 	for (nlen = 0; nlen < dlen; nlen++) {
1399d53690fSGabriel Krisman Bertazi 		int c = utf8byte(&cur);
1409d53690fSGabriel Krisman Bertazi 
1419d53690fSGabriel Krisman Bertazi 		dest[nlen] = c;
1429d53690fSGabriel Krisman Bertazi 		if (!c)
1439d53690fSGabriel Krisman Bertazi 			return nlen;
1449d53690fSGabriel Krisman Bertazi 		if (c == -1)
1459d53690fSGabriel Krisman Bertazi 			break;
1469d53690fSGabriel Krisman Bertazi 	}
1479d53690fSGabriel Krisman Bertazi 	return -EINVAL;
1489d53690fSGabriel Krisman Bertazi }
1499d53690fSGabriel Krisman Bertazi 
1509d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_normalize);
1519d53690fSGabriel Krisman Bertazi 
1529d53690fSGabriel Krisman Bertazi static int utf8_parse_version(const char *version, unsigned int *maj,
1539d53690fSGabriel Krisman Bertazi 			      unsigned int *min, unsigned int *rev)
1549d53690fSGabriel Krisman Bertazi {
1559d53690fSGabriel Krisman Bertazi 	substring_t args[3];
1569d53690fSGabriel Krisman Bertazi 	char version_string[12];
1579d53690fSGabriel Krisman Bertazi 	const struct match_token token[] = {
1589d53690fSGabriel Krisman Bertazi 		{1, "%d.%d.%d"},
1599d53690fSGabriel Krisman Bertazi 		{0, NULL}
1609d53690fSGabriel Krisman Bertazi 	};
1619d53690fSGabriel Krisman Bertazi 
1629d53690fSGabriel Krisman Bertazi 	strncpy(version_string, version, sizeof(version_string));
1639d53690fSGabriel Krisman Bertazi 
1649d53690fSGabriel Krisman Bertazi 	if (match_token(version_string, token, args) != 1)
1659d53690fSGabriel Krisman Bertazi 		return -EINVAL;
1669d53690fSGabriel Krisman Bertazi 
1679d53690fSGabriel Krisman Bertazi 	if (match_int(&args[0], maj) || match_int(&args[1], min) ||
1689d53690fSGabriel Krisman Bertazi 	    match_int(&args[2], rev))
1699d53690fSGabriel Krisman Bertazi 		return -EINVAL;
1709d53690fSGabriel Krisman Bertazi 
1719d53690fSGabriel Krisman Bertazi 	return 0;
1729d53690fSGabriel Krisman Bertazi }
1739d53690fSGabriel Krisman Bertazi 
1749d53690fSGabriel Krisman Bertazi struct unicode_map *utf8_load(const char *version)
1759d53690fSGabriel Krisman Bertazi {
1769d53690fSGabriel Krisman Bertazi 	struct unicode_map *um = NULL;
1779d53690fSGabriel Krisman Bertazi 	int unicode_version;
1789d53690fSGabriel Krisman Bertazi 
1799d53690fSGabriel Krisman Bertazi 	if (version) {
1809d53690fSGabriel Krisman Bertazi 		unsigned int maj, min, rev;
1819d53690fSGabriel Krisman Bertazi 
1829d53690fSGabriel Krisman Bertazi 		if (utf8_parse_version(version, &maj, &min, &rev) < 0)
1839d53690fSGabriel Krisman Bertazi 			return ERR_PTR(-EINVAL);
1849d53690fSGabriel Krisman Bertazi 
1859d53690fSGabriel Krisman Bertazi 		if (!utf8version_is_supported(maj, min, rev))
1869d53690fSGabriel Krisman Bertazi 			return ERR_PTR(-EINVAL);
1879d53690fSGabriel Krisman Bertazi 
1889d53690fSGabriel Krisman Bertazi 		unicode_version = UNICODE_AGE(maj, min, rev);
1899d53690fSGabriel Krisman Bertazi 	} else {
1909d53690fSGabriel Krisman Bertazi 		unicode_version = utf8version_latest();
1919d53690fSGabriel Krisman Bertazi 		printk(KERN_WARNING"UTF-8 version not specified. "
1929d53690fSGabriel Krisman Bertazi 		       "Assuming latest supported version (%d.%d.%d).",
1939d53690fSGabriel Krisman Bertazi 		       (unicode_version >> 16) & 0xff,
1949d53690fSGabriel Krisman Bertazi 		       (unicode_version >> 8) & 0xff,
1959d53690fSGabriel Krisman Bertazi 		       (unicode_version & 0xff));
1969d53690fSGabriel Krisman Bertazi 	}
1979d53690fSGabriel Krisman Bertazi 
1989d53690fSGabriel Krisman Bertazi 	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
1999d53690fSGabriel Krisman Bertazi 	if (!um)
2009d53690fSGabriel Krisman Bertazi 		return ERR_PTR(-ENOMEM);
2019d53690fSGabriel Krisman Bertazi 
2029d53690fSGabriel Krisman Bertazi 	um->charset = "UTF-8";
2039d53690fSGabriel Krisman Bertazi 	um->version = unicode_version;
2049d53690fSGabriel Krisman Bertazi 
2059d53690fSGabriel Krisman Bertazi 	return um;
2069d53690fSGabriel Krisman Bertazi }
2079d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_load);
2089d53690fSGabriel Krisman Bertazi 
2099d53690fSGabriel Krisman Bertazi void utf8_unload(struct unicode_map *um)
2109d53690fSGabriel Krisman Bertazi {
2119d53690fSGabriel Krisman Bertazi 	kfree(um);
2129d53690fSGabriel Krisman Bertazi }
2139d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_unload);
2149d53690fSGabriel Krisman Bertazi 
2159d53690fSGabriel Krisman Bertazi MODULE_LICENSE("GPL v2");
216