xref: /openbmc/linux/fs/unicode/utf8-core.c (revision 9d53690f0d4e5686e80f034ea584b7a822b356d3)
1*9d53690fSGabriel Krisman Bertazi /* SPDX-License-Identifier: GPL-2.0 */
2*9d53690fSGabriel Krisman Bertazi #include <linux/module.h>
3*9d53690fSGabriel Krisman Bertazi #include <linux/kernel.h>
4*9d53690fSGabriel Krisman Bertazi #include <linux/string.h>
5*9d53690fSGabriel Krisman Bertazi #include <linux/slab.h>
6*9d53690fSGabriel Krisman Bertazi #include <linux/parser.h>
7*9d53690fSGabriel Krisman Bertazi #include <linux/errno.h>
8*9d53690fSGabriel Krisman Bertazi #include <linux/unicode.h>
9*9d53690fSGabriel Krisman Bertazi 
10*9d53690fSGabriel Krisman Bertazi #include "utf8n.h"
11*9d53690fSGabriel Krisman Bertazi 
12*9d53690fSGabriel Krisman Bertazi int utf8_validate(const struct unicode_map *um, const struct qstr *str)
13*9d53690fSGabriel Krisman Bertazi {
14*9d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdi(um->version);
15*9d53690fSGabriel Krisman Bertazi 
16*9d53690fSGabriel Krisman Bertazi 	if (utf8nlen(data, str->name, str->len) < 0)
17*9d53690fSGabriel Krisman Bertazi 		return -1;
18*9d53690fSGabriel Krisman Bertazi 	return 0;
19*9d53690fSGabriel Krisman Bertazi }
20*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_validate);
21*9d53690fSGabriel Krisman Bertazi 
22*9d53690fSGabriel Krisman Bertazi int utf8_strncmp(const struct unicode_map *um,
23*9d53690fSGabriel Krisman Bertazi 		 const struct qstr *s1, const struct qstr *s2)
24*9d53690fSGabriel Krisman Bertazi {
25*9d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdi(um->version);
26*9d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur1, cur2;
27*9d53690fSGabriel Krisman Bertazi 	int c1, c2;
28*9d53690fSGabriel Krisman Bertazi 
29*9d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
30*9d53690fSGabriel Krisman Bertazi 		return -EINVAL;
31*9d53690fSGabriel Krisman Bertazi 
32*9d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
33*9d53690fSGabriel Krisman Bertazi 		return -EINVAL;
34*9d53690fSGabriel Krisman Bertazi 
35*9d53690fSGabriel Krisman Bertazi 	do {
36*9d53690fSGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
37*9d53690fSGabriel Krisman Bertazi 		c2 = utf8byte(&cur2);
38*9d53690fSGabriel Krisman Bertazi 
39*9d53690fSGabriel Krisman Bertazi 		if (c1 < 0 || c2 < 0)
40*9d53690fSGabriel Krisman Bertazi 			return -EINVAL;
41*9d53690fSGabriel Krisman Bertazi 		if (c1 != c2)
42*9d53690fSGabriel Krisman Bertazi 			return 1;
43*9d53690fSGabriel Krisman Bertazi 	} while (c1);
44*9d53690fSGabriel Krisman Bertazi 
45*9d53690fSGabriel Krisman Bertazi 	return 0;
46*9d53690fSGabriel Krisman Bertazi }
47*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncmp);
48*9d53690fSGabriel Krisman Bertazi 
49*9d53690fSGabriel Krisman Bertazi int utf8_strncasecmp(const struct unicode_map *um,
50*9d53690fSGabriel Krisman Bertazi 		     const struct qstr *s1, const struct qstr *s2)
51*9d53690fSGabriel Krisman Bertazi {
52*9d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdicf(um->version);
53*9d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur1, cur2;
54*9d53690fSGabriel Krisman Bertazi 	int c1, c2;
55*9d53690fSGabriel Krisman Bertazi 
56*9d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
57*9d53690fSGabriel Krisman Bertazi 		return -EINVAL;
58*9d53690fSGabriel Krisman Bertazi 
59*9d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
60*9d53690fSGabriel Krisman Bertazi 		return -EINVAL;
61*9d53690fSGabriel Krisman Bertazi 
62*9d53690fSGabriel Krisman Bertazi 	do {
63*9d53690fSGabriel Krisman Bertazi 		c1 = utf8byte(&cur1);
64*9d53690fSGabriel Krisman Bertazi 		c2 = utf8byte(&cur2);
65*9d53690fSGabriel Krisman Bertazi 
66*9d53690fSGabriel Krisman Bertazi 		if (c1 < 0 || c2 < 0)
67*9d53690fSGabriel Krisman Bertazi 			return -EINVAL;
68*9d53690fSGabriel Krisman Bertazi 		if (c1 != c2)
69*9d53690fSGabriel Krisman Bertazi 			return 1;
70*9d53690fSGabriel Krisman Bertazi 	} while (c1);
71*9d53690fSGabriel Krisman Bertazi 
72*9d53690fSGabriel Krisman Bertazi 	return 0;
73*9d53690fSGabriel Krisman Bertazi }
74*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp);
75*9d53690fSGabriel Krisman Bertazi 
76*9d53690fSGabriel Krisman Bertazi int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
77*9d53690fSGabriel Krisman Bertazi 		  unsigned char *dest, size_t dlen)
78*9d53690fSGabriel Krisman Bertazi {
79*9d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdicf(um->version);
80*9d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur;
81*9d53690fSGabriel Krisman Bertazi 	size_t nlen = 0;
82*9d53690fSGabriel Krisman Bertazi 
83*9d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
84*9d53690fSGabriel Krisman Bertazi 		return -EINVAL;
85*9d53690fSGabriel Krisman Bertazi 
86*9d53690fSGabriel Krisman Bertazi 	for (nlen = 0; nlen < dlen; nlen++) {
87*9d53690fSGabriel Krisman Bertazi 		int c = utf8byte(&cur);
88*9d53690fSGabriel Krisman Bertazi 
89*9d53690fSGabriel Krisman Bertazi 		dest[nlen] = c;
90*9d53690fSGabriel Krisman Bertazi 		if (!c)
91*9d53690fSGabriel Krisman Bertazi 			return nlen;
92*9d53690fSGabriel Krisman Bertazi 		if (c == -1)
93*9d53690fSGabriel Krisman Bertazi 			break;
94*9d53690fSGabriel Krisman Bertazi 	}
95*9d53690fSGabriel Krisman Bertazi 	return -EINVAL;
96*9d53690fSGabriel Krisman Bertazi }
97*9d53690fSGabriel Krisman Bertazi 
98*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_casefold);
99*9d53690fSGabriel Krisman Bertazi 
100*9d53690fSGabriel Krisman Bertazi int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
101*9d53690fSGabriel Krisman Bertazi 		   unsigned char *dest, size_t dlen)
102*9d53690fSGabriel Krisman Bertazi {
103*9d53690fSGabriel Krisman Bertazi 	const struct utf8data *data = utf8nfdi(um->version);
104*9d53690fSGabriel Krisman Bertazi 	struct utf8cursor cur;
105*9d53690fSGabriel Krisman Bertazi 	ssize_t nlen = 0;
106*9d53690fSGabriel Krisman Bertazi 
107*9d53690fSGabriel Krisman Bertazi 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
108*9d53690fSGabriel Krisman Bertazi 		return -EINVAL;
109*9d53690fSGabriel Krisman Bertazi 
110*9d53690fSGabriel Krisman Bertazi 	for (nlen = 0; nlen < dlen; nlen++) {
111*9d53690fSGabriel Krisman Bertazi 		int c = utf8byte(&cur);
112*9d53690fSGabriel Krisman Bertazi 
113*9d53690fSGabriel Krisman Bertazi 		dest[nlen] = c;
114*9d53690fSGabriel Krisman Bertazi 		if (!c)
115*9d53690fSGabriel Krisman Bertazi 			return nlen;
116*9d53690fSGabriel Krisman Bertazi 		if (c == -1)
117*9d53690fSGabriel Krisman Bertazi 			break;
118*9d53690fSGabriel Krisman Bertazi 	}
119*9d53690fSGabriel Krisman Bertazi 	return -EINVAL;
120*9d53690fSGabriel Krisman Bertazi }
121*9d53690fSGabriel Krisman Bertazi 
122*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_normalize);
123*9d53690fSGabriel Krisman Bertazi 
124*9d53690fSGabriel Krisman Bertazi static int utf8_parse_version(const char *version, unsigned int *maj,
125*9d53690fSGabriel Krisman Bertazi 			      unsigned int *min, unsigned int *rev)
126*9d53690fSGabriel Krisman Bertazi {
127*9d53690fSGabriel Krisman Bertazi 	substring_t args[3];
128*9d53690fSGabriel Krisman Bertazi 	char version_string[12];
129*9d53690fSGabriel Krisman Bertazi 	const struct match_token token[] = {
130*9d53690fSGabriel Krisman Bertazi 		{1, "%d.%d.%d"},
131*9d53690fSGabriel Krisman Bertazi 		{0, NULL}
132*9d53690fSGabriel Krisman Bertazi 	};
133*9d53690fSGabriel Krisman Bertazi 
134*9d53690fSGabriel Krisman Bertazi 	strncpy(version_string, version, sizeof(version_string));
135*9d53690fSGabriel Krisman Bertazi 
136*9d53690fSGabriel Krisman Bertazi 	if (match_token(version_string, token, args) != 1)
137*9d53690fSGabriel Krisman Bertazi 		return -EINVAL;
138*9d53690fSGabriel Krisman Bertazi 
139*9d53690fSGabriel Krisman Bertazi 	if (match_int(&args[0], maj) || match_int(&args[1], min) ||
140*9d53690fSGabriel Krisman Bertazi 	    match_int(&args[2], rev))
141*9d53690fSGabriel Krisman Bertazi 		return -EINVAL;
142*9d53690fSGabriel Krisman Bertazi 
143*9d53690fSGabriel Krisman Bertazi 	return 0;
144*9d53690fSGabriel Krisman Bertazi }
145*9d53690fSGabriel Krisman Bertazi 
146*9d53690fSGabriel Krisman Bertazi struct unicode_map *utf8_load(const char *version)
147*9d53690fSGabriel Krisman Bertazi {
148*9d53690fSGabriel Krisman Bertazi 	struct unicode_map *um = NULL;
149*9d53690fSGabriel Krisman Bertazi 	int unicode_version;
150*9d53690fSGabriel Krisman Bertazi 
151*9d53690fSGabriel Krisman Bertazi 	if (version) {
152*9d53690fSGabriel Krisman Bertazi 		unsigned int maj, min, rev;
153*9d53690fSGabriel Krisman Bertazi 
154*9d53690fSGabriel Krisman Bertazi 		if (utf8_parse_version(version, &maj, &min, &rev) < 0)
155*9d53690fSGabriel Krisman Bertazi 			return ERR_PTR(-EINVAL);
156*9d53690fSGabriel Krisman Bertazi 
157*9d53690fSGabriel Krisman Bertazi 		if (!utf8version_is_supported(maj, min, rev))
158*9d53690fSGabriel Krisman Bertazi 			return ERR_PTR(-EINVAL);
159*9d53690fSGabriel Krisman Bertazi 
160*9d53690fSGabriel Krisman Bertazi 		unicode_version = UNICODE_AGE(maj, min, rev);
161*9d53690fSGabriel Krisman Bertazi 	} else {
162*9d53690fSGabriel Krisman Bertazi 		unicode_version = utf8version_latest();
163*9d53690fSGabriel Krisman Bertazi 		printk(KERN_WARNING"UTF-8 version not specified. "
164*9d53690fSGabriel Krisman Bertazi 		       "Assuming latest supported version (%d.%d.%d).",
165*9d53690fSGabriel Krisman Bertazi 		       (unicode_version >> 16) & 0xff,
166*9d53690fSGabriel Krisman Bertazi 		       (unicode_version >> 8) & 0xff,
167*9d53690fSGabriel Krisman Bertazi 		       (unicode_version & 0xff));
168*9d53690fSGabriel Krisman Bertazi 	}
169*9d53690fSGabriel Krisman Bertazi 
170*9d53690fSGabriel Krisman Bertazi 	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
171*9d53690fSGabriel Krisman Bertazi 	if (!um)
172*9d53690fSGabriel Krisman Bertazi 		return ERR_PTR(-ENOMEM);
173*9d53690fSGabriel Krisman Bertazi 
174*9d53690fSGabriel Krisman Bertazi 	um->charset = "UTF-8";
175*9d53690fSGabriel Krisman Bertazi 	um->version = unicode_version;
176*9d53690fSGabriel Krisman Bertazi 
177*9d53690fSGabriel Krisman Bertazi 	return um;
178*9d53690fSGabriel Krisman Bertazi }
179*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_load);
180*9d53690fSGabriel Krisman Bertazi 
181*9d53690fSGabriel Krisman Bertazi void utf8_unload(struct unicode_map *um)
182*9d53690fSGabriel Krisman Bertazi {
183*9d53690fSGabriel Krisman Bertazi 	kfree(um);
184*9d53690fSGabriel Krisman Bertazi }
185*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_unload);
186*9d53690fSGabriel Krisman Bertazi 
187*9d53690fSGabriel Krisman Bertazi MODULE_LICENSE("GPL v2");
188