1f11a164bSHeinrich Schuchardt // SPDX-License-Identifier: GPL-2.0+
2f11a164bSHeinrich Schuchardt /*
3f11a164bSHeinrich Schuchardt * Unit tests for Unicode functions
4f11a164bSHeinrich Schuchardt *
5f11a164bSHeinrich Schuchardt * Copyright (c) 2018 Heinrich Schuchardt <xypron.glpk@gmx.de>
6f11a164bSHeinrich Schuchardt */
7f11a164bSHeinrich Schuchardt
8f11a164bSHeinrich Schuchardt #include <common.h>
9f11a164bSHeinrich Schuchardt #include <charset.h>
10f11a164bSHeinrich Schuchardt #include <command.h>
11f11a164bSHeinrich Schuchardt #include <errno.h>
12f11a164bSHeinrich Schuchardt #include <test/test.h>
13f11a164bSHeinrich Schuchardt #include <test/suites.h>
14f11a164bSHeinrich Schuchardt #include <test/ut.h>
15f11a164bSHeinrich Schuchardt
16f11a164bSHeinrich Schuchardt /* Linker list entry for a Unicode test */
17f11a164bSHeinrich Schuchardt #define UNICODE_TEST(_name) UNIT_TEST(_name, 0, unicode_test)
18f11a164bSHeinrich Schuchardt
19f11a164bSHeinrich Schuchardt /* Constants c1-c4 and d1-d4 encode the same letters */
20f11a164bSHeinrich Schuchardt
21f11a164bSHeinrich Schuchardt /* Six characters translating to one utf-8 byte each. */
22f11a164bSHeinrich Schuchardt static const u16 c1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00};
23f11a164bSHeinrich Schuchardt /* One character translating to two utf-8 bytes */
24f11a164bSHeinrich Schuchardt static const u16 c2[] = {0x6b, 0x61, 0x66, 0x62, 0xe1, 0x74, 0x75, 0x72, 0x00};
25f11a164bSHeinrich Schuchardt /* Three characters translating to three utf-8 bytes each */
26f11a164bSHeinrich Schuchardt static const u16 c3[] = {0x6f5c, 0x6c34, 0x8266, 0x00};
27f11a164bSHeinrich Schuchardt /* Three letters translating to four utf-8 bytes each */
28f11a164bSHeinrich Schuchardt static const u16 c4[] = {0xd801, 0xdc8d, 0xd801, 0xdc96, 0xd801, 0xdc87,
29f11a164bSHeinrich Schuchardt 0x0000};
30f11a164bSHeinrich Schuchardt
31f11a164bSHeinrich Schuchardt /* Illegal utf-16 strings */
32f11a164bSHeinrich Schuchardt static const u16 i1[] = {0x69, 0x31, 0xdc87, 0x6c, 0x00};
33f11a164bSHeinrich Schuchardt static const u16 i2[] = {0x69, 0x32, 0xd801, 0xd801, 0x6c, 0x00};
34f11a164bSHeinrich Schuchardt static const u16 i3[] = {0x69, 0x33, 0xd801, 0x00};
35f11a164bSHeinrich Schuchardt
36f11a164bSHeinrich Schuchardt /* Six characters translating to one utf-16 word each. */
37f11a164bSHeinrich Schuchardt static const char d1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00};
38f11a164bSHeinrich Schuchardt /* Eight characters translating to one utf-16 word each */
39f11a164bSHeinrich Schuchardt static const char d2[] = {0x6b, 0x61, 0x66, 0x62, 0xc3, 0xa1, 0x74, 0x75,
40f11a164bSHeinrich Schuchardt 0x72, 0x00};
41f11a164bSHeinrich Schuchardt /* Three characters translating to one utf-16 word each */
42f11a164bSHeinrich Schuchardt static const char d3[] = {0xe6, 0xbd, 0x9c, 0xe6, 0xb0, 0xb4, 0xe8, 0x89,
43f11a164bSHeinrich Schuchardt 0xa6, 0x00};
44f11a164bSHeinrich Schuchardt /* Three letters translating to two utf-16 word each */
45f11a164bSHeinrich Schuchardt static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96,
46f11a164bSHeinrich Schuchardt 0xf0, 0x90, 0x92, 0x87, 0x00};
47f11a164bSHeinrich Schuchardt
48f11a164bSHeinrich Schuchardt /* Illegal utf-8 strings */
49f11a164bSHeinrich Schuchardt static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00};
50f11a164bSHeinrich Schuchardt static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00};
51f11a164bSHeinrich Schuchardt static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00};
52f11a164bSHeinrich Schuchardt
unicode_test_u16_strdup(struct unit_test_state * uts)53*bc19681aSHeinrich Schuchardt static int unicode_test_u16_strdup(struct unit_test_state *uts)
54abb93cb0SHeinrich Schuchardt {
55abb93cb0SHeinrich Schuchardt u16 *copy = u16_strdup(c4);
56abb93cb0SHeinrich Schuchardt
57abb93cb0SHeinrich Schuchardt ut_assert(copy != c4);
58abb93cb0SHeinrich Schuchardt ut_assert(!memcmp(copy, c4, sizeof(c4)));
59abb93cb0SHeinrich Schuchardt free(copy);
60abb93cb0SHeinrich Schuchardt return 0;
61abb93cb0SHeinrich Schuchardt }
62*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_u16_strdup);
63abb93cb0SHeinrich Schuchardt
unicode_test_u16_strcpy(struct unit_test_state * uts)64*bc19681aSHeinrich Schuchardt static int unicode_test_u16_strcpy(struct unit_test_state *uts)
65abb93cb0SHeinrich Schuchardt {
66abb93cb0SHeinrich Schuchardt u16 *r;
67abb93cb0SHeinrich Schuchardt u16 copy[10];
68abb93cb0SHeinrich Schuchardt
69abb93cb0SHeinrich Schuchardt r = u16_strcpy(copy, c1);
70abb93cb0SHeinrich Schuchardt ut_assert(r == copy);
71abb93cb0SHeinrich Schuchardt ut_assert(!memcmp(copy, c1, sizeof(c1)));
72abb93cb0SHeinrich Schuchardt return 0;
73abb93cb0SHeinrich Schuchardt }
74*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_u16_strcpy);
75abb93cb0SHeinrich Schuchardt
76fbba2f67SHeinrich Schuchardt /* U-Boot uses UTF-16 strings in the EFI context only. */
77fbba2f67SHeinrich Schuchardt #if CONFIG_IS_ENABLED(EFI_LOADER) && !defined(API_BUILD)
unicode_test_string16(struct unit_test_state * uts)78*bc19681aSHeinrich Schuchardt static int unicode_test_string16(struct unit_test_state *uts)
79fbba2f67SHeinrich Schuchardt {
80fbba2f67SHeinrich Schuchardt char buf[20];
81fbba2f67SHeinrich Schuchardt
82fbba2f67SHeinrich Schuchardt /* Test length and precision */
83fbba2f67SHeinrich Schuchardt memset(buf, 0xff, sizeof(buf));
84fbba2f67SHeinrich Schuchardt sprintf(buf, "%8.6ls", c2);
85fbba2f67SHeinrich Schuchardt ut_asserteq(' ', buf[1]);
86fbba2f67SHeinrich Schuchardt ut_assert(!strncmp(&buf[2], d2, 7));
87fbba2f67SHeinrich Schuchardt ut_assert(!buf[9]);
88fbba2f67SHeinrich Schuchardt
89fbba2f67SHeinrich Schuchardt memset(buf, 0xff, sizeof(buf));
90fbba2f67SHeinrich Schuchardt sprintf(buf, "%8.6ls", c4);
91fbba2f67SHeinrich Schuchardt ut_asserteq(' ', buf[4]);
92fbba2f67SHeinrich Schuchardt ut_assert(!strncmp(&buf[5], d4, 12));
93fbba2f67SHeinrich Schuchardt ut_assert(!buf[17]);
94fbba2f67SHeinrich Schuchardt
95fbba2f67SHeinrich Schuchardt memset(buf, 0xff, sizeof(buf));
96fbba2f67SHeinrich Schuchardt sprintf(buf, "%-8.2ls", c4);
97fbba2f67SHeinrich Schuchardt ut_asserteq(' ', buf[8]);
98fbba2f67SHeinrich Schuchardt ut_assert(!strncmp(buf, d4, 8));
99fbba2f67SHeinrich Schuchardt ut_assert(!buf[14]);
100fbba2f67SHeinrich Schuchardt
101fbba2f67SHeinrich Schuchardt /* Test handling of illegal utf-16 sequences */
102fbba2f67SHeinrich Schuchardt memset(buf, 0xff, sizeof(buf));
103fbba2f67SHeinrich Schuchardt sprintf(buf, "%ls", i1);
104fbba2f67SHeinrich Schuchardt ut_asserteq_str("i1?l", buf);
105fbba2f67SHeinrich Schuchardt
106fbba2f67SHeinrich Schuchardt memset(buf, 0xff, sizeof(buf));
107fbba2f67SHeinrich Schuchardt sprintf(buf, "%ls", i2);
108fbba2f67SHeinrich Schuchardt ut_asserteq_str("i2?l", buf);
109fbba2f67SHeinrich Schuchardt
110fbba2f67SHeinrich Schuchardt memset(buf, 0xff, sizeof(buf));
111fbba2f67SHeinrich Schuchardt sprintf(buf, "%ls", i3);
112fbba2f67SHeinrich Schuchardt ut_asserteq_str("i3?", buf);
113fbba2f67SHeinrich Schuchardt
114fbba2f67SHeinrich Schuchardt return 0;
115fbba2f67SHeinrich Schuchardt }
116*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_string16);
117fbba2f67SHeinrich Schuchardt #endif
118fbba2f67SHeinrich Schuchardt
unicode_test_utf8_get(struct unit_test_state * uts)119*bc19681aSHeinrich Schuchardt static int unicode_test_utf8_get(struct unit_test_state *uts)
120f11a164bSHeinrich Schuchardt {
121f11a164bSHeinrich Schuchardt const char *s;
122f11a164bSHeinrich Schuchardt s32 code;
123f11a164bSHeinrich Schuchardt int i;
124f11a164bSHeinrich Schuchardt
125f11a164bSHeinrich Schuchardt /* Check characters less than 0x800 */
126f11a164bSHeinrich Schuchardt s = d2;
127f11a164bSHeinrich Schuchardt for (i = 0; i < 8; ++i) {
128f11a164bSHeinrich Schuchardt code = utf8_get((const char **)&s);
129f11a164bSHeinrich Schuchardt /* c2 is the utf-8 encoding of d2 */
130f11a164bSHeinrich Schuchardt ut_asserteq(c2[i], code);
131f11a164bSHeinrich Schuchardt if (!code)
132f11a164bSHeinrich Schuchardt break;
133f11a164bSHeinrich Schuchardt }
134f11a164bSHeinrich Schuchardt ut_asserteq_ptr(s, d2 + 9)
135f11a164bSHeinrich Schuchardt
136f11a164bSHeinrich Schuchardt /* Check characters less than 0x10000 */
137f11a164bSHeinrich Schuchardt s = d3;
138f11a164bSHeinrich Schuchardt for (i = 0; i < 4; ++i) {
139f11a164bSHeinrich Schuchardt code = utf8_get((const char **)&s);
140f11a164bSHeinrich Schuchardt /* c3 is the utf-8 encoding of d3 */
141f11a164bSHeinrich Schuchardt ut_asserteq(c3[i], code);
142f11a164bSHeinrich Schuchardt if (!code)
143f11a164bSHeinrich Schuchardt break;
144f11a164bSHeinrich Schuchardt }
145f11a164bSHeinrich Schuchardt ut_asserteq_ptr(s, d3 + 9)
146f11a164bSHeinrich Schuchardt
147f11a164bSHeinrich Schuchardt /* Check character greater 0xffff */
148f11a164bSHeinrich Schuchardt s = d4;
149f11a164bSHeinrich Schuchardt code = utf8_get((const char **)&s);
150f11a164bSHeinrich Schuchardt ut_asserteq(0x0001048d, code);
151f11a164bSHeinrich Schuchardt ut_asserteq_ptr(s, d4 + 4);
152f11a164bSHeinrich Schuchardt
153f11a164bSHeinrich Schuchardt return 0;
154f11a164bSHeinrich Schuchardt }
155*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf8_get);
156f11a164bSHeinrich Schuchardt
unicode_test_utf8_put(struct unit_test_state * uts)157*bc19681aSHeinrich Schuchardt static int unicode_test_utf8_put(struct unit_test_state *uts)
158f11a164bSHeinrich Schuchardt {
159f11a164bSHeinrich Schuchardt char buffer[8] = { 0, };
160f11a164bSHeinrich Schuchardt char *pos;
161f11a164bSHeinrich Schuchardt
162f11a164bSHeinrich Schuchardt /* Commercial at, translates to one character */
163f11a164bSHeinrich Schuchardt pos = buffer;
164f11a164bSHeinrich Schuchardt ut_assert(!utf8_put('@', &pos))
165f11a164bSHeinrich Schuchardt ut_asserteq(1, pos - buffer);
166f11a164bSHeinrich Schuchardt ut_asserteq('@', buffer[0]);
167f11a164bSHeinrich Schuchardt ut_assert(!buffer[1]);
168f11a164bSHeinrich Schuchardt
169f11a164bSHeinrich Schuchardt /* Latin letter G with acute, translates to two charactes */
170f11a164bSHeinrich Schuchardt pos = buffer;
171f11a164bSHeinrich Schuchardt ut_assert(!utf8_put(0x1f4, &pos));
172f11a164bSHeinrich Schuchardt ut_asserteq(2, pos - buffer);
173f11a164bSHeinrich Schuchardt ut_asserteq_str("\xc7\xb4", buffer);
174f11a164bSHeinrich Schuchardt
175f11a164bSHeinrich Schuchardt /* Tagalog letter i, translates to three characters */
176f11a164bSHeinrich Schuchardt pos = buffer;
177f11a164bSHeinrich Schuchardt ut_assert(!utf8_put(0x1701, &pos));
178f11a164bSHeinrich Schuchardt ut_asserteq(3, pos - buffer);
179f11a164bSHeinrich Schuchardt ut_asserteq_str("\xe1\x9c\x81", buffer);
180f11a164bSHeinrich Schuchardt
181f11a164bSHeinrich Schuchardt /* Hamster face, translates to four characters */
182f11a164bSHeinrich Schuchardt pos = buffer;
183f11a164bSHeinrich Schuchardt ut_assert(!utf8_put(0x1f439, &pos));
184f11a164bSHeinrich Schuchardt ut_asserteq(4, pos - buffer);
185f11a164bSHeinrich Schuchardt ut_asserteq_str("\xf0\x9f\x90\xb9", buffer);
186f11a164bSHeinrich Schuchardt
187f11a164bSHeinrich Schuchardt /* Illegal code */
188f11a164bSHeinrich Schuchardt pos = buffer;
189f11a164bSHeinrich Schuchardt ut_asserteq(-1, utf8_put(0xd888, &pos));
190f11a164bSHeinrich Schuchardt
191f11a164bSHeinrich Schuchardt return 0;
192f11a164bSHeinrich Schuchardt }
193*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf8_put);
194f11a164bSHeinrich Schuchardt
unicode_test_utf8_utf16_strlen(struct unit_test_state * uts)195*bc19681aSHeinrich Schuchardt static int unicode_test_utf8_utf16_strlen(struct unit_test_state *uts)
196f11a164bSHeinrich Schuchardt {
197f11a164bSHeinrich Schuchardt ut_asserteq(6, utf8_utf16_strlen(d1));
198f11a164bSHeinrich Schuchardt ut_asserteq(8, utf8_utf16_strlen(d2));
199f11a164bSHeinrich Schuchardt ut_asserteq(3, utf8_utf16_strlen(d3));
200f11a164bSHeinrich Schuchardt ut_asserteq(6, utf8_utf16_strlen(d4));
201f11a164bSHeinrich Schuchardt
202f11a164bSHeinrich Schuchardt /* illegal utf-8 sequences */
203f11a164bSHeinrich Schuchardt ut_asserteq(4, utf8_utf16_strlen(j1));
20435cbb796SHeinrich Schuchardt ut_asserteq(4, utf8_utf16_strlen(j2));
205f11a164bSHeinrich Schuchardt ut_asserteq(3, utf8_utf16_strlen(j3));
206f11a164bSHeinrich Schuchardt
207f11a164bSHeinrich Schuchardt return 0;
208f11a164bSHeinrich Schuchardt }
209*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf8_utf16_strlen);
210f11a164bSHeinrich Schuchardt
unicode_test_utf8_utf16_strnlen(struct unit_test_state * uts)211*bc19681aSHeinrich Schuchardt static int unicode_test_utf8_utf16_strnlen(struct unit_test_state *uts)
212f11a164bSHeinrich Schuchardt {
213f11a164bSHeinrich Schuchardt ut_asserteq(3, utf8_utf16_strnlen(d1, 3));
214f11a164bSHeinrich Schuchardt ut_asserteq(6, utf8_utf16_strnlen(d1, 13));
215f11a164bSHeinrich Schuchardt ut_asserteq(6, utf8_utf16_strnlen(d2, 6));
216f11a164bSHeinrich Schuchardt ut_asserteq(2, utf8_utf16_strnlen(d3, 2));
217f11a164bSHeinrich Schuchardt ut_asserteq(4, utf8_utf16_strnlen(d4, 2));
218f11a164bSHeinrich Schuchardt ut_asserteq(6, utf8_utf16_strnlen(d4, 3));
219f11a164bSHeinrich Schuchardt
220f11a164bSHeinrich Schuchardt /* illegal utf-8 sequences */
221f11a164bSHeinrich Schuchardt ut_asserteq(4, utf8_utf16_strnlen(j1, 16));
22235cbb796SHeinrich Schuchardt ut_asserteq(4, utf8_utf16_strnlen(j2, 16));
223f11a164bSHeinrich Schuchardt ut_asserteq(3, utf8_utf16_strnlen(j3, 16));
224f11a164bSHeinrich Schuchardt
225f11a164bSHeinrich Schuchardt return 0;
226f11a164bSHeinrich Schuchardt }
227*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf8_utf16_strnlen);
228f11a164bSHeinrich Schuchardt
229f11a164bSHeinrich Schuchardt /**
230f11a164bSHeinrich Schuchardt * ut_u16_strcmp() - Compare to u16 strings.
231f11a164bSHeinrich Schuchardt *
232f11a164bSHeinrich Schuchardt * @a1: first string
233f11a164bSHeinrich Schuchardt * @a2: second string
234f11a164bSHeinrich Schuchardt * @count: number of u16 to compare
235f11a164bSHeinrich Schuchardt * Return: -1 if a1 < a2, 0 if a1 == a2, 1 if a1 > a2
236f11a164bSHeinrich Schuchardt */
unicode_test_u16_strcmp(const u16 * a1,const u16 * a2,size_t count)237*bc19681aSHeinrich Schuchardt static int unicode_test_u16_strcmp(const u16 *a1, const u16 *a2, size_t count)
238f11a164bSHeinrich Schuchardt {
239f11a164bSHeinrich Schuchardt for (; (*a1 || *a2) && count; ++a1, ++a2, --count) {
240f11a164bSHeinrich Schuchardt if (*a1 < *a2)
241f11a164bSHeinrich Schuchardt return -1;
242f11a164bSHeinrich Schuchardt if (*a1 > *a2)
243f11a164bSHeinrich Schuchardt return 1;
244f11a164bSHeinrich Schuchardt }
245f11a164bSHeinrich Schuchardt return 0;
246f11a164bSHeinrich Schuchardt }
247f11a164bSHeinrich Schuchardt
unicode_test_utf8_utf16_strcpy(struct unit_test_state * uts)248*bc19681aSHeinrich Schuchardt static int unicode_test_utf8_utf16_strcpy(struct unit_test_state *uts)
249f11a164bSHeinrich Schuchardt {
250f11a164bSHeinrich Schuchardt u16 buf[16];
251f11a164bSHeinrich Schuchardt u16 *pos;
252f11a164bSHeinrich Schuchardt
253f11a164bSHeinrich Schuchardt pos = buf;
254f11a164bSHeinrich Schuchardt utf8_utf16_strcpy(&pos, d1);
255f11a164bSHeinrich Schuchardt ut_asserteq(6, pos - buf);
256*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, c1, SIZE_MAX));
257f11a164bSHeinrich Schuchardt
258f11a164bSHeinrich Schuchardt pos = buf;
259f11a164bSHeinrich Schuchardt utf8_utf16_strcpy(&pos, d2);
260f11a164bSHeinrich Schuchardt ut_asserteq(8, pos - buf);
261*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, c2, SIZE_MAX));
262f11a164bSHeinrich Schuchardt
263f11a164bSHeinrich Schuchardt pos = buf;
264f11a164bSHeinrich Schuchardt utf8_utf16_strcpy(&pos, d3);
265f11a164bSHeinrich Schuchardt ut_asserteq(3, pos - buf);
266*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, c3, SIZE_MAX));
267f11a164bSHeinrich Schuchardt
268f11a164bSHeinrich Schuchardt pos = buf;
269f11a164bSHeinrich Schuchardt utf8_utf16_strcpy(&pos, d4);
270f11a164bSHeinrich Schuchardt ut_asserteq(6, pos - buf);
271*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, c4, SIZE_MAX));
272f11a164bSHeinrich Schuchardt
273f11a164bSHeinrich Schuchardt /* Illegal utf-8 strings */
274f11a164bSHeinrich Schuchardt pos = buf;
275f11a164bSHeinrich Schuchardt utf8_utf16_strcpy(&pos, j1);
276f11a164bSHeinrich Schuchardt ut_asserteq(4, pos - buf);
277*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, L"j1?l", SIZE_MAX));
278f11a164bSHeinrich Schuchardt
279f11a164bSHeinrich Schuchardt pos = buf;
280f11a164bSHeinrich Schuchardt utf8_utf16_strcpy(&pos, j2);
28135cbb796SHeinrich Schuchardt ut_asserteq(4, pos - buf);
282*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, L"j2?l", SIZE_MAX));
283f11a164bSHeinrich Schuchardt
284f11a164bSHeinrich Schuchardt pos = buf;
285f11a164bSHeinrich Schuchardt utf8_utf16_strcpy(&pos, j3);
286f11a164bSHeinrich Schuchardt ut_asserteq(3, pos - buf);
287*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, L"j3?", SIZE_MAX));
288f11a164bSHeinrich Schuchardt
289f11a164bSHeinrich Schuchardt return 0;
290f11a164bSHeinrich Schuchardt }
291*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf8_utf16_strcpy);
292f11a164bSHeinrich Schuchardt
unicode_test_utf8_utf16_strncpy(struct unit_test_state * uts)293*bc19681aSHeinrich Schuchardt static int unicode_test_utf8_utf16_strncpy(struct unit_test_state *uts)
294f11a164bSHeinrich Schuchardt {
295f11a164bSHeinrich Schuchardt u16 buf[16];
296f11a164bSHeinrich Schuchardt u16 *pos;
297f11a164bSHeinrich Schuchardt
298f11a164bSHeinrich Schuchardt pos = buf;
299f11a164bSHeinrich Schuchardt memset(buf, 0, sizeof(buf));
300f11a164bSHeinrich Schuchardt utf8_utf16_strncpy(&pos, d1, 4);
301f11a164bSHeinrich Schuchardt ut_asserteq(4, pos - buf);
302f11a164bSHeinrich Schuchardt ut_assert(!buf[4]);
303*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, c1, 4));
304f11a164bSHeinrich Schuchardt
305f11a164bSHeinrich Schuchardt pos = buf;
306f11a164bSHeinrich Schuchardt memset(buf, 0, sizeof(buf));
307f11a164bSHeinrich Schuchardt utf8_utf16_strncpy(&pos, d2, 10);
308f11a164bSHeinrich Schuchardt ut_asserteq(8, pos - buf);
309f11a164bSHeinrich Schuchardt ut_assert(buf[4]);
310*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, c2, SIZE_MAX));
311f11a164bSHeinrich Schuchardt
312f11a164bSHeinrich Schuchardt pos = buf;
313f11a164bSHeinrich Schuchardt memset(buf, 0, sizeof(buf));
314f11a164bSHeinrich Schuchardt utf8_utf16_strncpy(&pos, d3, 2);
315f11a164bSHeinrich Schuchardt ut_asserteq(2, pos - buf);
316f11a164bSHeinrich Schuchardt ut_assert(!buf[2]);
317*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, c3, 2));
318f11a164bSHeinrich Schuchardt
319f11a164bSHeinrich Schuchardt pos = buf;
320f11a164bSHeinrich Schuchardt memset(buf, 0, sizeof(buf));
321f11a164bSHeinrich Schuchardt utf8_utf16_strncpy(&pos, d4, 2);
322f11a164bSHeinrich Schuchardt ut_asserteq(4, pos - buf);
323f11a164bSHeinrich Schuchardt ut_assert(!buf[4]);
324*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, c4, 4));
325f11a164bSHeinrich Schuchardt
326f11a164bSHeinrich Schuchardt pos = buf;
327f11a164bSHeinrich Schuchardt memset(buf, 0, sizeof(buf));
328f11a164bSHeinrich Schuchardt utf8_utf16_strncpy(&pos, d4, 10);
329f11a164bSHeinrich Schuchardt ut_asserteq(6, pos - buf);
330f11a164bSHeinrich Schuchardt ut_assert(buf[5]);
331*bc19681aSHeinrich Schuchardt ut_assert(!unicode_test_u16_strcmp(buf, c4, SIZE_MAX));
332f11a164bSHeinrich Schuchardt
333f11a164bSHeinrich Schuchardt return 0;
334f11a164bSHeinrich Schuchardt }
335*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf8_utf16_strncpy);
336f11a164bSHeinrich Schuchardt
unicode_test_utf16_get(struct unit_test_state * uts)337*bc19681aSHeinrich Schuchardt static int unicode_test_utf16_get(struct unit_test_state *uts)
338f11a164bSHeinrich Schuchardt {
339f11a164bSHeinrich Schuchardt const u16 *s;
340f11a164bSHeinrich Schuchardt s32 code;
341f11a164bSHeinrich Schuchardt int i;
342f11a164bSHeinrich Schuchardt
343f11a164bSHeinrich Schuchardt /* Check characters less than 0x10000 */
344f11a164bSHeinrich Schuchardt s = c2;
345f11a164bSHeinrich Schuchardt for (i = 0; i < 9; ++i) {
346f11a164bSHeinrich Schuchardt code = utf16_get((const u16 **)&s);
347f11a164bSHeinrich Schuchardt ut_asserteq(c2[i], code);
348f11a164bSHeinrich Schuchardt if (!code)
349f11a164bSHeinrich Schuchardt break;
350f11a164bSHeinrich Schuchardt }
351f11a164bSHeinrich Schuchardt ut_asserteq_ptr(c2 + 8, s);
352f11a164bSHeinrich Schuchardt
353f11a164bSHeinrich Schuchardt /* Check character greater 0xffff */
354f11a164bSHeinrich Schuchardt s = c4;
355f11a164bSHeinrich Schuchardt code = utf16_get((const u16 **)&s);
356f11a164bSHeinrich Schuchardt ut_asserteq(0x0001048d, code);
357f11a164bSHeinrich Schuchardt ut_asserteq_ptr(c4 + 2, s);
358f11a164bSHeinrich Schuchardt
359f11a164bSHeinrich Schuchardt return 0;
360f11a164bSHeinrich Schuchardt }
361*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf16_get);
362f11a164bSHeinrich Schuchardt
unicode_test_utf16_put(struct unit_test_state * uts)363*bc19681aSHeinrich Schuchardt static int unicode_test_utf16_put(struct unit_test_state *uts)
364f11a164bSHeinrich Schuchardt {
365f11a164bSHeinrich Schuchardt u16 buffer[4] = { 0, };
366f11a164bSHeinrich Schuchardt u16 *pos;
367f11a164bSHeinrich Schuchardt
368f11a164bSHeinrich Schuchardt /* Commercial at, translates to one word */
369f11a164bSHeinrich Schuchardt pos = buffer;
370f11a164bSHeinrich Schuchardt ut_assert(!utf16_put('@', &pos));
371f11a164bSHeinrich Schuchardt ut_asserteq(1, pos - buffer);
372f11a164bSHeinrich Schuchardt ut_asserteq((u16)'@', buffer[0]);
373f11a164bSHeinrich Schuchardt ut_assert(!buffer[1]);
374f11a164bSHeinrich Schuchardt
375f11a164bSHeinrich Schuchardt /* Hamster face, translates to two words */
376f11a164bSHeinrich Schuchardt pos = buffer;
377f11a164bSHeinrich Schuchardt ut_assert(!utf16_put(0x1f439, &pos));
378f11a164bSHeinrich Schuchardt ut_asserteq(2, pos - buffer);
379f11a164bSHeinrich Schuchardt ut_asserteq((u16)0xd83d, buffer[0]);
380f11a164bSHeinrich Schuchardt ut_asserteq((u16)0xdc39, buffer[1]);
381f11a164bSHeinrich Schuchardt ut_assert(!buffer[2]);
382f11a164bSHeinrich Schuchardt
383f11a164bSHeinrich Schuchardt /* Illegal code */
384f11a164bSHeinrich Schuchardt pos = buffer;
385f11a164bSHeinrich Schuchardt ut_asserteq(-1, utf16_put(0xd888, &pos));
386f11a164bSHeinrich Schuchardt
387f11a164bSHeinrich Schuchardt return 0;
388f11a164bSHeinrich Schuchardt }
389*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf16_put);
390f11a164bSHeinrich Schuchardt
unicode_test_utf16_strnlen(struct unit_test_state * uts)391*bc19681aSHeinrich Schuchardt static int unicode_test_utf16_strnlen(struct unit_test_state *uts)
392f11a164bSHeinrich Schuchardt {
393f11a164bSHeinrich Schuchardt ut_asserteq(3, utf16_strnlen(c1, 3));
394f11a164bSHeinrich Schuchardt ut_asserteq(6, utf16_strnlen(c1, 13));
395f11a164bSHeinrich Schuchardt ut_asserteq(6, utf16_strnlen(c2, 6));
396f11a164bSHeinrich Schuchardt ut_asserteq(2, utf16_strnlen(c3, 2));
397f11a164bSHeinrich Schuchardt ut_asserteq(2, utf16_strnlen(c4, 2));
398f11a164bSHeinrich Schuchardt ut_asserteq(3, utf16_strnlen(c4, 3));
399f11a164bSHeinrich Schuchardt
400f11a164bSHeinrich Schuchardt /* illegal utf-16 word sequences */
401f11a164bSHeinrich Schuchardt ut_asserteq(4, utf16_strnlen(i1, 16));
402f11a164bSHeinrich Schuchardt ut_asserteq(4, utf16_strnlen(i2, 16));
403f11a164bSHeinrich Schuchardt ut_asserteq(3, utf16_strnlen(i3, 16));
404f11a164bSHeinrich Schuchardt
405f11a164bSHeinrich Schuchardt return 0;
406f11a164bSHeinrich Schuchardt }
407*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf16_strnlen);
408f11a164bSHeinrich Schuchardt
unicode_test_utf16_utf8_strlen(struct unit_test_state * uts)409*bc19681aSHeinrich Schuchardt static int unicode_test_utf16_utf8_strlen(struct unit_test_state *uts)
410f11a164bSHeinrich Schuchardt {
411f11a164bSHeinrich Schuchardt ut_asserteq(6, utf16_utf8_strlen(c1));
412f11a164bSHeinrich Schuchardt ut_asserteq(9, utf16_utf8_strlen(c2));
413f11a164bSHeinrich Schuchardt ut_asserteq(9, utf16_utf8_strlen(c3));
414f11a164bSHeinrich Schuchardt ut_asserteq(12, utf16_utf8_strlen(c4));
415f11a164bSHeinrich Schuchardt
416f11a164bSHeinrich Schuchardt /* illegal utf-16 word sequences */
417f11a164bSHeinrich Schuchardt ut_asserteq(4, utf16_utf8_strlen(i1));
418f11a164bSHeinrich Schuchardt ut_asserteq(4, utf16_utf8_strlen(i2));
419f11a164bSHeinrich Schuchardt ut_asserteq(3, utf16_utf8_strlen(i3));
420f11a164bSHeinrich Schuchardt
421f11a164bSHeinrich Schuchardt return 0;
422f11a164bSHeinrich Schuchardt }
423*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf16_utf8_strlen);
424f11a164bSHeinrich Schuchardt
unicode_test_utf16_utf8_strnlen(struct unit_test_state * uts)425*bc19681aSHeinrich Schuchardt static int unicode_test_utf16_utf8_strnlen(struct unit_test_state *uts)
426f11a164bSHeinrich Schuchardt {
427f11a164bSHeinrich Schuchardt ut_asserteq(3, utf16_utf8_strnlen(c1, 3));
428f11a164bSHeinrich Schuchardt ut_asserteq(6, utf16_utf8_strnlen(c1, 13));
429f11a164bSHeinrich Schuchardt ut_asserteq(7, utf16_utf8_strnlen(c2, 6));
430f11a164bSHeinrich Schuchardt ut_asserteq(6, utf16_utf8_strnlen(c3, 2));
431f11a164bSHeinrich Schuchardt ut_asserteq(8, utf16_utf8_strnlen(c4, 2));
432f11a164bSHeinrich Schuchardt ut_asserteq(12, utf16_utf8_strnlen(c4, 3));
433f11a164bSHeinrich Schuchardt return 0;
434f11a164bSHeinrich Schuchardt }
435*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf16_utf8_strnlen);
436f11a164bSHeinrich Schuchardt
unicode_test_utf16_utf8_strcpy(struct unit_test_state * uts)437*bc19681aSHeinrich Schuchardt static int unicode_test_utf16_utf8_strcpy(struct unit_test_state *uts)
438f11a164bSHeinrich Schuchardt {
439f11a164bSHeinrich Schuchardt char buf[16];
440f11a164bSHeinrich Schuchardt char *pos;
441f11a164bSHeinrich Schuchardt
442f11a164bSHeinrich Schuchardt pos = buf;
443f11a164bSHeinrich Schuchardt utf16_utf8_strcpy(&pos, c1);
444f11a164bSHeinrich Schuchardt ut_asserteq(6, pos - buf);
445f11a164bSHeinrich Schuchardt ut_asserteq_str(d1, buf);
446f11a164bSHeinrich Schuchardt
447f11a164bSHeinrich Schuchardt pos = buf;
448f11a164bSHeinrich Schuchardt utf16_utf8_strcpy(&pos, c2);
449f11a164bSHeinrich Schuchardt ut_asserteq(9, pos - buf);
450f11a164bSHeinrich Schuchardt ut_asserteq_str(d2, buf);
451f11a164bSHeinrich Schuchardt
452f11a164bSHeinrich Schuchardt pos = buf;
453f11a164bSHeinrich Schuchardt utf16_utf8_strcpy(&pos, c3);
454f11a164bSHeinrich Schuchardt ut_asserteq(9, pos - buf);
455f11a164bSHeinrich Schuchardt ut_asserteq_str(d3, buf);
456f11a164bSHeinrich Schuchardt
457f11a164bSHeinrich Schuchardt pos = buf;
458f11a164bSHeinrich Schuchardt utf16_utf8_strcpy(&pos, c4);
459f11a164bSHeinrich Schuchardt ut_asserteq(12, pos - buf);
460f11a164bSHeinrich Schuchardt ut_asserteq_str(d4, buf);
461f11a164bSHeinrich Schuchardt
462f11a164bSHeinrich Schuchardt /* Illegal utf-16 strings */
463f11a164bSHeinrich Schuchardt pos = buf;
464f11a164bSHeinrich Schuchardt utf16_utf8_strcpy(&pos, i1);
465f11a164bSHeinrich Schuchardt ut_asserteq(4, pos - buf);
466f11a164bSHeinrich Schuchardt ut_asserteq_str("i1?l", buf);
467f11a164bSHeinrich Schuchardt
468f11a164bSHeinrich Schuchardt pos = buf;
469f11a164bSHeinrich Schuchardt utf16_utf8_strcpy(&pos, i2);
470f11a164bSHeinrich Schuchardt ut_asserteq(4, pos - buf);
471f11a164bSHeinrich Schuchardt ut_asserteq_str("i2?l", buf);
472f11a164bSHeinrich Schuchardt
473f11a164bSHeinrich Schuchardt pos = buf;
474f11a164bSHeinrich Schuchardt utf16_utf8_strcpy(&pos, i3);
475f11a164bSHeinrich Schuchardt ut_asserteq(3, pos - buf);
476f11a164bSHeinrich Schuchardt ut_asserteq_str("i3?", buf);
477f11a164bSHeinrich Schuchardt
478f11a164bSHeinrich Schuchardt return 0;
479f11a164bSHeinrich Schuchardt }
480*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf16_utf8_strcpy);
481f11a164bSHeinrich Schuchardt
unicode_test_utf16_utf8_strncpy(struct unit_test_state * uts)482*bc19681aSHeinrich Schuchardt static int unicode_test_utf16_utf8_strncpy(struct unit_test_state *uts)
483f11a164bSHeinrich Schuchardt {
484f11a164bSHeinrich Schuchardt char buf[16];
485f11a164bSHeinrich Schuchardt char *pos;
486f11a164bSHeinrich Schuchardt
487f11a164bSHeinrich Schuchardt pos = buf;
488f11a164bSHeinrich Schuchardt memset(buf, 0, sizeof(buf));
489f11a164bSHeinrich Schuchardt utf16_utf8_strncpy(&pos, c1, 4);
490f11a164bSHeinrich Schuchardt ut_asserteq(4, pos - buf);
491f11a164bSHeinrich Schuchardt ut_assert(!buf[4]);
492f11a164bSHeinrich Schuchardt ut_assert(!strncmp(buf, d1, 4));
493f11a164bSHeinrich Schuchardt
494f11a164bSHeinrich Schuchardt pos = buf;
495f11a164bSHeinrich Schuchardt memset(buf, 0, sizeof(buf));
496f11a164bSHeinrich Schuchardt utf16_utf8_strncpy(&pos, c2, 10);
497f11a164bSHeinrich Schuchardt ut_asserteq(9, pos - buf);
498f11a164bSHeinrich Schuchardt ut_assert(buf[4]);
499f11a164bSHeinrich Schuchardt ut_assert(!strncmp(buf, d2, SIZE_MAX));
500f11a164bSHeinrich Schuchardt
501f11a164bSHeinrich Schuchardt pos = buf;
502f11a164bSHeinrich Schuchardt memset(buf, 0, sizeof(buf));
503f11a164bSHeinrich Schuchardt utf16_utf8_strncpy(&pos, c3, 2);
504f11a164bSHeinrich Schuchardt ut_asserteq(6, pos - buf);
505f11a164bSHeinrich Schuchardt ut_assert(!buf[6]);
506f11a164bSHeinrich Schuchardt ut_assert(!strncmp(buf, d3, 6));
507f11a164bSHeinrich Schuchardt
508f11a164bSHeinrich Schuchardt pos = buf;
509f11a164bSHeinrich Schuchardt memset(buf, 0, sizeof(buf));
510f11a164bSHeinrich Schuchardt utf16_utf8_strncpy(&pos, c4, 2);
511f11a164bSHeinrich Schuchardt ut_asserteq(8, pos - buf);
512f11a164bSHeinrich Schuchardt ut_assert(!buf[8]);
513f11a164bSHeinrich Schuchardt ut_assert(!strncmp(buf, d4, 8));
514f11a164bSHeinrich Schuchardt
515f11a164bSHeinrich Schuchardt pos = buf;
516f11a164bSHeinrich Schuchardt memset(buf, 0, sizeof(buf));
517f11a164bSHeinrich Schuchardt utf16_utf8_strncpy(&pos, c4, 10);
518f11a164bSHeinrich Schuchardt ut_asserteq(12, pos - buf);
519f11a164bSHeinrich Schuchardt ut_assert(buf[5]);
520f11a164bSHeinrich Schuchardt ut_assert(!strncmp(buf, d4, SIZE_MAX));
521f11a164bSHeinrich Schuchardt
522f11a164bSHeinrich Schuchardt return 0;
523f11a164bSHeinrich Schuchardt }
524*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf16_utf8_strncpy);
525f11a164bSHeinrich Schuchardt
unicode_test_utf_to_lower(struct unit_test_state * uts)526*bc19681aSHeinrich Schuchardt static int unicode_test_utf_to_lower(struct unit_test_state *uts)
5271a1012a1SHeinrich Schuchardt {
5281a1012a1SHeinrich Schuchardt ut_asserteq('@', utf_to_lower('@'));
5291a1012a1SHeinrich Schuchardt ut_asserteq('a', utf_to_lower('A'));
5301a1012a1SHeinrich Schuchardt ut_asserteq('z', utf_to_lower('Z'));
5311a1012a1SHeinrich Schuchardt ut_asserteq('[', utf_to_lower('['));
5321a1012a1SHeinrich Schuchardt ut_asserteq('m', utf_to_lower('m'));
5331a1012a1SHeinrich Schuchardt /* Latin letter O with diaresis (umlaut) */
5341a1012a1SHeinrich Schuchardt ut_asserteq(0x00f6, utf_to_lower(0x00d6));
5351a1012a1SHeinrich Schuchardt #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
5361a1012a1SHeinrich Schuchardt /* Cyrillic letter I*/
5371a1012a1SHeinrich Schuchardt ut_asserteq(0x0438, utf_to_lower(0x0418));
5381a1012a1SHeinrich Schuchardt #endif
5391a1012a1SHeinrich Schuchardt return 0;
5401a1012a1SHeinrich Schuchardt }
541*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf_to_lower);
5421a1012a1SHeinrich Schuchardt
unicode_test_utf_to_upper(struct unit_test_state * uts)543*bc19681aSHeinrich Schuchardt static int unicode_test_utf_to_upper(struct unit_test_state *uts)
5441a1012a1SHeinrich Schuchardt {
5451a1012a1SHeinrich Schuchardt ut_asserteq('`', utf_to_upper('`'));
5461a1012a1SHeinrich Schuchardt ut_asserteq('A', utf_to_upper('a'));
5471a1012a1SHeinrich Schuchardt ut_asserteq('Z', utf_to_upper('z'));
5481a1012a1SHeinrich Schuchardt ut_asserteq('{', utf_to_upper('{'));
5491a1012a1SHeinrich Schuchardt ut_asserteq('M', utf_to_upper('M'));
5501a1012a1SHeinrich Schuchardt /* Latin letter O with diaresis (umlaut) */
5511a1012a1SHeinrich Schuchardt ut_asserteq(0x00d6, utf_to_upper(0x00f6));
5521a1012a1SHeinrich Schuchardt #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
5531a1012a1SHeinrich Schuchardt /* Cyrillic letter I */
5541a1012a1SHeinrich Schuchardt ut_asserteq(0x0418, utf_to_upper(0x0438));
5551a1012a1SHeinrich Schuchardt #endif
5561a1012a1SHeinrich Schuchardt return 0;
5571a1012a1SHeinrich Schuchardt }
558*bc19681aSHeinrich Schuchardt UNICODE_TEST(unicode_test_utf_to_upper);
5591a1012a1SHeinrich Schuchardt
do_ut_unicode(cmd_tbl_t * cmdtp,int flag,int argc,char * const argv[])560f11a164bSHeinrich Schuchardt int do_ut_unicode(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])
561f11a164bSHeinrich Schuchardt {
562f11a164bSHeinrich Schuchardt struct unit_test *tests = ll_entry_start(struct unit_test, unicode_test);
563f11a164bSHeinrich Schuchardt const int n_ents = ll_entry_count(struct unit_test, unicode_test);
564f11a164bSHeinrich Schuchardt
565f11a164bSHeinrich Schuchardt return cmd_ut_category("Unicode", tests, n_ents, argc, argv);
566f11a164bSHeinrich Schuchardt }
567