xref: /openbmc/linux/fs/unicode/utf8-core.c (revision 1a59d1b8)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/module.h>
3 #include <linux/kernel.h>
4 #include <linux/string.h>
5 #include <linux/slab.h>
6 #include <linux/parser.h>
7 #include <linux/errno.h>
8 #include <linux/unicode.h>
9 
10 #include "utf8n.h"
11 
12 int utf8_validate(const struct unicode_map *um, const struct qstr *str)
13 {
14 	const struct utf8data *data = utf8nfdi(um->version);
15 
16 	if (utf8nlen(data, str->name, str->len) < 0)
17 		return -1;
18 	return 0;
19 }
20 EXPORT_SYMBOL(utf8_validate);
21 
22 int utf8_strncmp(const struct unicode_map *um,
23 		 const struct qstr *s1, const struct qstr *s2)
24 {
25 	const struct utf8data *data = utf8nfdi(um->version);
26 	struct utf8cursor cur1, cur2;
27 	int c1, c2;
28 
29 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
30 		return -EINVAL;
31 
32 	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
33 		return -EINVAL;
34 
35 	do {
36 		c1 = utf8byte(&cur1);
37 		c2 = utf8byte(&cur2);
38 
39 		if (c1 < 0 || c2 < 0)
40 			return -EINVAL;
41 		if (c1 != c2)
42 			return 1;
43 	} while (c1);
44 
45 	return 0;
46 }
47 EXPORT_SYMBOL(utf8_strncmp);
48 
49 int utf8_strncasecmp(const struct unicode_map *um,
50 		     const struct qstr *s1, const struct qstr *s2)
51 {
52 	const struct utf8data *data = utf8nfdicf(um->version);
53 	struct utf8cursor cur1, cur2;
54 	int c1, c2;
55 
56 	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
57 		return -EINVAL;
58 
59 	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
60 		return -EINVAL;
61 
62 	do {
63 		c1 = utf8byte(&cur1);
64 		c2 = utf8byte(&cur2);
65 
66 		if (c1 < 0 || c2 < 0)
67 			return -EINVAL;
68 		if (c1 != c2)
69 			return 1;
70 	} while (c1);
71 
72 	return 0;
73 }
74 EXPORT_SYMBOL(utf8_strncasecmp);
75 
76 int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
77 		  unsigned char *dest, size_t dlen)
78 {
79 	const struct utf8data *data = utf8nfdicf(um->version);
80 	struct utf8cursor cur;
81 	size_t nlen = 0;
82 
83 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
84 		return -EINVAL;
85 
86 	for (nlen = 0; nlen < dlen; nlen++) {
87 		int c = utf8byte(&cur);
88 
89 		dest[nlen] = c;
90 		if (!c)
91 			return nlen;
92 		if (c == -1)
93 			break;
94 	}
95 	return -EINVAL;
96 }
97 
98 EXPORT_SYMBOL(utf8_casefold);
99 
100 int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
101 		   unsigned char *dest, size_t dlen)
102 {
103 	const struct utf8data *data = utf8nfdi(um->version);
104 	struct utf8cursor cur;
105 	ssize_t nlen = 0;
106 
107 	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
108 		return -EINVAL;
109 
110 	for (nlen = 0; nlen < dlen; nlen++) {
111 		int c = utf8byte(&cur);
112 
113 		dest[nlen] = c;
114 		if (!c)
115 			return nlen;
116 		if (c == -1)
117 			break;
118 	}
119 	return -EINVAL;
120 }
121 
122 EXPORT_SYMBOL(utf8_normalize);
123 
124 static int utf8_parse_version(const char *version, unsigned int *maj,
125 			      unsigned int *min, unsigned int *rev)
126 {
127 	substring_t args[3];
128 	char version_string[12];
129 	const struct match_token token[] = {
130 		{1, "%d.%d.%d"},
131 		{0, NULL}
132 	};
133 
134 	strncpy(version_string, version, sizeof(version_string));
135 
136 	if (match_token(version_string, token, args) != 1)
137 		return -EINVAL;
138 
139 	if (match_int(&args[0], maj) || match_int(&args[1], min) ||
140 	    match_int(&args[2], rev))
141 		return -EINVAL;
142 
143 	return 0;
144 }
145 
146 struct unicode_map *utf8_load(const char *version)
147 {
148 	struct unicode_map *um = NULL;
149 	int unicode_version;
150 
151 	if (version) {
152 		unsigned int maj, min, rev;
153 
154 		if (utf8_parse_version(version, &maj, &min, &rev) < 0)
155 			return ERR_PTR(-EINVAL);
156 
157 		if (!utf8version_is_supported(maj, min, rev))
158 			return ERR_PTR(-EINVAL);
159 
160 		unicode_version = UNICODE_AGE(maj, min, rev);
161 	} else {
162 		unicode_version = utf8version_latest();
163 		printk(KERN_WARNING"UTF-8 version not specified. "
164 		       "Assuming latest supported version (%d.%d.%d).",
165 		       (unicode_version >> 16) & 0xff,
166 		       (unicode_version >> 8) & 0xff,
167 		       (unicode_version & 0xff));
168 	}
169 
170 	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
171 	if (!um)
172 		return ERR_PTR(-ENOMEM);
173 
174 	um->charset = "UTF-8";
175 	um->version = unicode_version;
176 
177 	return um;
178 }
179 EXPORT_SYMBOL(utf8_load);
180 
181 void utf8_unload(struct unicode_map *um)
182 {
183 	kfree(um);
184 }
185 EXPORT_SYMBOL(utf8_unload);
186 
187 MODULE_LICENSE("GPL v2");
188