1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Unit tests for Unicode functions 4 * 5 * Copyright (c) 2018 Heinrich Schuchardt <xypron.glpk@gmx.de> 6 */ 7 8 #include <common.h> 9 #include <charset.h> 10 #include <command.h> 11 #include <errno.h> 12 #include <test/test.h> 13 #include <test/suites.h> 14 #include <test/ut.h> 15 16 /* Linker list entry for a Unicode test */ 17 #define UNICODE_TEST(_name) UNIT_TEST(_name, 0, unicode_test) 18 19 /* Constants c1-c4 and d1-d4 encode the same letters */ 20 21 /* Six characters translating to one utf-8 byte each. */ 22 static const u16 c1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00}; 23 /* One character translating to two utf-8 bytes */ 24 static const u16 c2[] = {0x6b, 0x61, 0x66, 0x62, 0xe1, 0x74, 0x75, 0x72, 0x00}; 25 /* Three characters translating to three utf-8 bytes each */ 26 static const u16 c3[] = {0x6f5c, 0x6c34, 0x8266, 0x00}; 27 /* Three letters translating to four utf-8 bytes each */ 28 static const u16 c4[] = {0xd801, 0xdc8d, 0xd801, 0xdc96, 0xd801, 0xdc87, 29 0x0000}; 30 31 /* Illegal utf-16 strings */ 32 static const u16 i1[] = {0x69, 0x31, 0xdc87, 0x6c, 0x00}; 33 static const u16 i2[] = {0x69, 0x32, 0xd801, 0xd801, 0x6c, 0x00}; 34 static const u16 i3[] = {0x69, 0x33, 0xd801, 0x00}; 35 36 /* Six characters translating to one utf-16 word each. */ 37 static const char d1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00}; 38 /* Eight characters translating to one utf-16 word each */ 39 static const char d2[] = {0x6b, 0x61, 0x66, 0x62, 0xc3, 0xa1, 0x74, 0x75, 40 0x72, 0x00}; 41 /* Three characters translating to one utf-16 word each */ 42 static const char d3[] = {0xe6, 0xbd, 0x9c, 0xe6, 0xb0, 0xb4, 0xe8, 0x89, 43 0xa6, 0x00}; 44 /* Three letters translating to two utf-16 word each */ 45 static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96, 46 0xf0, 0x90, 0x92, 0x87, 0x00}; 47 48 /* Illegal utf-8 strings */ 49 static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00}; 50 static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00}; 51 static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00}; 52 53 static int ut_utf8_get(struct unit_test_state *uts) 54 { 55 const char *s; 56 s32 code; 57 int i; 58 59 /* Check characters less than 0x800 */ 60 s = d2; 61 for (i = 0; i < 8; ++i) { 62 code = utf8_get((const char **)&s); 63 /* c2 is the utf-8 encoding of d2 */ 64 ut_asserteq(c2[i], code); 65 if (!code) 66 break; 67 } 68 ut_asserteq_ptr(s, d2 + 9) 69 70 /* Check characters less than 0x10000 */ 71 s = d3; 72 for (i = 0; i < 4; ++i) { 73 code = utf8_get((const char **)&s); 74 /* c3 is the utf-8 encoding of d3 */ 75 ut_asserteq(c3[i], code); 76 if (!code) 77 break; 78 } 79 ut_asserteq_ptr(s, d3 + 9) 80 81 /* Check character greater 0xffff */ 82 s = d4; 83 code = utf8_get((const char **)&s); 84 ut_asserteq(0x0001048d, code); 85 ut_asserteq_ptr(s, d4 + 4); 86 87 return 0; 88 } 89 UNICODE_TEST(ut_utf8_get); 90 91 static int ut_utf8_put(struct unit_test_state *uts) 92 { 93 char buffer[8] = { 0, }; 94 char *pos; 95 96 /* Commercial at, translates to one character */ 97 pos = buffer; 98 ut_assert(!utf8_put('@', &pos)) 99 ut_asserteq(1, pos - buffer); 100 ut_asserteq('@', buffer[0]); 101 ut_assert(!buffer[1]); 102 103 /* Latin letter G with acute, translates to two charactes */ 104 pos = buffer; 105 ut_assert(!utf8_put(0x1f4, &pos)); 106 ut_asserteq(2, pos - buffer); 107 ut_asserteq_str("\xc7\xb4", buffer); 108 109 /* Tagalog letter i, translates to three characters */ 110 pos = buffer; 111 ut_assert(!utf8_put(0x1701, &pos)); 112 ut_asserteq(3, pos - buffer); 113 ut_asserteq_str("\xe1\x9c\x81", buffer); 114 115 /* Hamster face, translates to four characters */ 116 pos = buffer; 117 ut_assert(!utf8_put(0x1f439, &pos)); 118 ut_asserteq(4, pos - buffer); 119 ut_asserteq_str("\xf0\x9f\x90\xb9", buffer); 120 121 /* Illegal code */ 122 pos = buffer; 123 ut_asserteq(-1, utf8_put(0xd888, &pos)); 124 125 return 0; 126 } 127 UNICODE_TEST(ut_utf8_put); 128 129 static int ut_utf8_utf16_strlen(struct unit_test_state *uts) 130 { 131 ut_asserteq(6, utf8_utf16_strlen(d1)); 132 ut_asserteq(8, utf8_utf16_strlen(d2)); 133 ut_asserteq(3, utf8_utf16_strlen(d3)); 134 ut_asserteq(6, utf8_utf16_strlen(d4)); 135 136 /* illegal utf-8 sequences */ 137 ut_asserteq(4, utf8_utf16_strlen(j1)); 138 ut_asserteq(5, utf8_utf16_strlen(j2)); 139 ut_asserteq(3, utf8_utf16_strlen(j3)); 140 141 return 0; 142 } 143 UNICODE_TEST(ut_utf8_utf16_strlen); 144 145 static int ut_utf8_utf16_strnlen(struct unit_test_state *uts) 146 { 147 ut_asserteq(3, utf8_utf16_strnlen(d1, 3)); 148 ut_asserteq(6, utf8_utf16_strnlen(d1, 13)); 149 ut_asserteq(6, utf8_utf16_strnlen(d2, 6)); 150 ut_asserteq(2, utf8_utf16_strnlen(d3, 2)); 151 ut_asserteq(4, utf8_utf16_strnlen(d4, 2)); 152 ut_asserteq(6, utf8_utf16_strnlen(d4, 3)); 153 154 /* illegal utf-8 sequences */ 155 ut_asserteq(4, utf8_utf16_strnlen(j1, 16)); 156 ut_asserteq(5, utf8_utf16_strnlen(j2, 16)); 157 ut_asserteq(3, utf8_utf16_strnlen(j3, 16)); 158 159 return 0; 160 } 161 UNICODE_TEST(ut_utf8_utf16_strnlen); 162 163 /** 164 * ut_u16_strcmp() - Compare to u16 strings. 165 * 166 * @a1: first string 167 * @a2: second string 168 * @count: number of u16 to compare 169 * Return: -1 if a1 < a2, 0 if a1 == a2, 1 if a1 > a2 170 */ 171 static int ut_u16_strcmp(const u16 *a1, const u16 *a2, size_t count) 172 { 173 for (; (*a1 || *a2) && count; ++a1, ++a2, --count) { 174 if (*a1 < *a2) 175 return -1; 176 if (*a1 > *a2) 177 return 1; 178 } 179 return 0; 180 } 181 182 static int ut_utf8_utf16_strcpy(struct unit_test_state *uts) 183 { 184 u16 buf[16]; 185 u16 *pos; 186 187 pos = buf; 188 utf8_utf16_strcpy(&pos, d1); 189 ut_asserteq(6, pos - buf); 190 ut_assert(!ut_u16_strcmp(buf, c1, SIZE_MAX)); 191 192 pos = buf; 193 utf8_utf16_strcpy(&pos, d2); 194 ut_asserteq(8, pos - buf); 195 ut_assert(!ut_u16_strcmp(buf, c2, SIZE_MAX)); 196 197 pos = buf; 198 utf8_utf16_strcpy(&pos, d3); 199 ut_asserteq(3, pos - buf); 200 ut_assert(!ut_u16_strcmp(buf, c3, SIZE_MAX)); 201 202 pos = buf; 203 utf8_utf16_strcpy(&pos, d4); 204 ut_asserteq(6, pos - buf); 205 ut_assert(!ut_u16_strcmp(buf, c4, SIZE_MAX)); 206 207 /* Illegal utf-8 strings */ 208 pos = buf; 209 utf8_utf16_strcpy(&pos, j1); 210 ut_asserteq(4, pos - buf); 211 ut_assert(!ut_u16_strcmp(buf, L"j1?l", SIZE_MAX)); 212 213 pos = buf; 214 utf8_utf16_strcpy(&pos, j2); 215 ut_asserteq(5, pos - buf); 216 ut_assert(!ut_u16_strcmp(buf, L"j2??l", SIZE_MAX)); 217 218 pos = buf; 219 utf8_utf16_strcpy(&pos, j3); 220 ut_asserteq(3, pos - buf); 221 ut_assert(!ut_u16_strcmp(buf, L"j3?", SIZE_MAX)); 222 223 return 0; 224 } 225 UNICODE_TEST(ut_utf8_utf16_strcpy); 226 227 int ut_utf8_utf16_strncpy(struct unit_test_state *uts) 228 { 229 u16 buf[16]; 230 u16 *pos; 231 232 pos = buf; 233 memset(buf, 0, sizeof(buf)); 234 utf8_utf16_strncpy(&pos, d1, 4); 235 ut_asserteq(4, pos - buf); 236 ut_assert(!buf[4]); 237 ut_assert(!ut_u16_strcmp(buf, c1, 4)); 238 239 pos = buf; 240 memset(buf, 0, sizeof(buf)); 241 utf8_utf16_strncpy(&pos, d2, 10); 242 ut_asserteq(8, pos - buf); 243 ut_assert(buf[4]); 244 ut_assert(!ut_u16_strcmp(buf, c2, SIZE_MAX)); 245 246 pos = buf; 247 memset(buf, 0, sizeof(buf)); 248 utf8_utf16_strncpy(&pos, d3, 2); 249 ut_asserteq(2, pos - buf); 250 ut_assert(!buf[2]); 251 ut_assert(!ut_u16_strcmp(buf, c3, 2)); 252 253 pos = buf; 254 memset(buf, 0, sizeof(buf)); 255 utf8_utf16_strncpy(&pos, d4, 2); 256 ut_asserteq(4, pos - buf); 257 ut_assert(!buf[4]); 258 ut_assert(!ut_u16_strcmp(buf, c4, 4)); 259 260 pos = buf; 261 memset(buf, 0, sizeof(buf)); 262 utf8_utf16_strncpy(&pos, d4, 10); 263 ut_asserteq(6, pos - buf); 264 ut_assert(buf[5]); 265 ut_assert(!ut_u16_strcmp(buf, c4, SIZE_MAX)); 266 267 return 0; 268 } 269 UNICODE_TEST(ut_utf8_utf16_strncpy); 270 271 static int ut_utf16_get(struct unit_test_state *uts) 272 { 273 const u16 *s; 274 s32 code; 275 int i; 276 277 /* Check characters less than 0x10000 */ 278 s = c2; 279 for (i = 0; i < 9; ++i) { 280 code = utf16_get((const u16 **)&s); 281 ut_asserteq(c2[i], code); 282 if (!code) 283 break; 284 } 285 ut_asserteq_ptr(c2 + 8, s); 286 287 /* Check character greater 0xffff */ 288 s = c4; 289 code = utf16_get((const u16 **)&s); 290 ut_asserteq(0x0001048d, code); 291 ut_asserteq_ptr(c4 + 2, s); 292 293 return 0; 294 } 295 UNICODE_TEST(ut_utf16_get); 296 297 static int ut_utf16_put(struct unit_test_state *uts) 298 { 299 u16 buffer[4] = { 0, }; 300 u16 *pos; 301 302 /* Commercial at, translates to one word */ 303 pos = buffer; 304 ut_assert(!utf16_put('@', &pos)); 305 ut_asserteq(1, pos - buffer); 306 ut_asserteq((u16)'@', buffer[0]); 307 ut_assert(!buffer[1]); 308 309 /* Hamster face, translates to two words */ 310 pos = buffer; 311 ut_assert(!utf16_put(0x1f439, &pos)); 312 ut_asserteq(2, pos - buffer); 313 ut_asserteq((u16)0xd83d, buffer[0]); 314 ut_asserteq((u16)0xdc39, buffer[1]); 315 ut_assert(!buffer[2]); 316 317 /* Illegal code */ 318 pos = buffer; 319 ut_asserteq(-1, utf16_put(0xd888, &pos)); 320 321 return 0; 322 } 323 UNICODE_TEST(ut_utf16_put); 324 325 int ut_utf16_strnlen(struct unit_test_state *uts) 326 { 327 ut_asserteq(3, utf16_strnlen(c1, 3)); 328 ut_asserteq(6, utf16_strnlen(c1, 13)); 329 ut_asserteq(6, utf16_strnlen(c2, 6)); 330 ut_asserteq(2, utf16_strnlen(c3, 2)); 331 ut_asserteq(2, utf16_strnlen(c4, 2)); 332 ut_asserteq(3, utf16_strnlen(c4, 3)); 333 334 /* illegal utf-16 word sequences */ 335 ut_asserteq(4, utf16_strnlen(i1, 16)); 336 ut_asserteq(4, utf16_strnlen(i2, 16)); 337 ut_asserteq(3, utf16_strnlen(i3, 16)); 338 339 return 0; 340 } 341 UNICODE_TEST(ut_utf16_strnlen); 342 343 int ut_utf16_utf8_strlen(struct unit_test_state *uts) 344 { 345 ut_asserteq(6, utf16_utf8_strlen(c1)); 346 ut_asserteq(9, utf16_utf8_strlen(c2)); 347 ut_asserteq(9, utf16_utf8_strlen(c3)); 348 ut_asserteq(12, utf16_utf8_strlen(c4)); 349 350 /* illegal utf-16 word sequences */ 351 ut_asserteq(4, utf16_utf8_strlen(i1)); 352 ut_asserteq(4, utf16_utf8_strlen(i2)); 353 ut_asserteq(3, utf16_utf8_strlen(i3)); 354 355 return 0; 356 } 357 UNICODE_TEST(ut_utf16_utf8_strlen); 358 359 int ut_utf16_utf8_strnlen(struct unit_test_state *uts) 360 { 361 ut_asserteq(3, utf16_utf8_strnlen(c1, 3)); 362 ut_asserteq(6, utf16_utf8_strnlen(c1, 13)); 363 ut_asserteq(7, utf16_utf8_strnlen(c2, 6)); 364 ut_asserteq(6, utf16_utf8_strnlen(c3, 2)); 365 ut_asserteq(8, utf16_utf8_strnlen(c4, 2)); 366 ut_asserteq(12, utf16_utf8_strnlen(c4, 3)); 367 return 0; 368 } 369 UNICODE_TEST(ut_utf16_utf8_strnlen); 370 371 int ut_utf16_utf8_strcpy(struct unit_test_state *uts) 372 { 373 char buf[16]; 374 char *pos; 375 376 pos = buf; 377 utf16_utf8_strcpy(&pos, c1); 378 ut_asserteq(6, pos - buf); 379 ut_asserteq_str(d1, buf); 380 381 pos = buf; 382 utf16_utf8_strcpy(&pos, c2); 383 ut_asserteq(9, pos - buf); 384 ut_asserteq_str(d2, buf); 385 386 pos = buf; 387 utf16_utf8_strcpy(&pos, c3); 388 ut_asserteq(9, pos - buf); 389 ut_asserteq_str(d3, buf); 390 391 pos = buf; 392 utf16_utf8_strcpy(&pos, c4); 393 ut_asserteq(12, pos - buf); 394 ut_asserteq_str(d4, buf); 395 396 /* Illegal utf-16 strings */ 397 pos = buf; 398 utf16_utf8_strcpy(&pos, i1); 399 ut_asserteq(4, pos - buf); 400 ut_asserteq_str("i1?l", buf); 401 402 pos = buf; 403 utf16_utf8_strcpy(&pos, i2); 404 ut_asserteq(4, pos - buf); 405 ut_asserteq_str("i2?l", buf); 406 407 pos = buf; 408 utf16_utf8_strcpy(&pos, i3); 409 ut_asserteq(3, pos - buf); 410 ut_asserteq_str("i3?", buf); 411 412 return 0; 413 } 414 UNICODE_TEST(ut_utf16_utf8_strcpy); 415 416 int ut_utf16_utf8_strncpy(struct unit_test_state *uts) 417 { 418 char buf[16]; 419 char *pos; 420 421 pos = buf; 422 memset(buf, 0, sizeof(buf)); 423 utf16_utf8_strncpy(&pos, c1, 4); 424 ut_asserteq(4, pos - buf); 425 ut_assert(!buf[4]); 426 ut_assert(!strncmp(buf, d1, 4)); 427 428 pos = buf; 429 memset(buf, 0, sizeof(buf)); 430 utf16_utf8_strncpy(&pos, c2, 10); 431 ut_asserteq(9, pos - buf); 432 ut_assert(buf[4]); 433 ut_assert(!strncmp(buf, d2, SIZE_MAX)); 434 435 pos = buf; 436 memset(buf, 0, sizeof(buf)); 437 utf16_utf8_strncpy(&pos, c3, 2); 438 ut_asserteq(6, pos - buf); 439 ut_assert(!buf[6]); 440 ut_assert(!strncmp(buf, d3, 6)); 441 442 pos = buf; 443 memset(buf, 0, sizeof(buf)); 444 utf16_utf8_strncpy(&pos, c4, 2); 445 ut_asserteq(8, pos - buf); 446 ut_assert(!buf[8]); 447 ut_assert(!strncmp(buf, d4, 8)); 448 449 pos = buf; 450 memset(buf, 0, sizeof(buf)); 451 utf16_utf8_strncpy(&pos, c4, 10); 452 ut_asserteq(12, pos - buf); 453 ut_assert(buf[5]); 454 ut_assert(!strncmp(buf, d4, SIZE_MAX)); 455 456 return 0; 457 } 458 UNICODE_TEST(ut_utf16_utf8_strncpy); 459 460 int do_ut_unicode(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]) 461 { 462 struct unit_test *tests = ll_entry_start(struct unit_test, unicode_test); 463 const int n_ents = ll_entry_count(struct unit_test, unicode_test); 464 465 return cmd_ut_category("Unicode", tests, n_ents, argc, argv); 466 } 467