1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Unit tests for Unicode functions 4 * 5 * Copyright (c) 2018 Heinrich Schuchardt <xypron.glpk@gmx.de> 6 */ 7 8 #include <common.h> 9 #include <charset.h> 10 #include <command.h> 11 #include <errno.h> 12 #include <test/test.h> 13 #include <test/suites.h> 14 #include <test/ut.h> 15 16 /* Linker list entry for a Unicode test */ 17 #define UNICODE_TEST(_name) UNIT_TEST(_name, 0, unicode_test) 18 19 /* Constants c1-c4 and d1-d4 encode the same letters */ 20 21 /* Six characters translating to one utf-8 byte each. */ 22 static const u16 c1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00}; 23 /* One character translating to two utf-8 bytes */ 24 static const u16 c2[] = {0x6b, 0x61, 0x66, 0x62, 0xe1, 0x74, 0x75, 0x72, 0x00}; 25 /* Three characters translating to three utf-8 bytes each */ 26 static const u16 c3[] = {0x6f5c, 0x6c34, 0x8266, 0x00}; 27 /* Three letters translating to four utf-8 bytes each */ 28 static const u16 c4[] = {0xd801, 0xdc8d, 0xd801, 0xdc96, 0xd801, 0xdc87, 29 0x0000}; 30 31 /* Illegal utf-16 strings */ 32 static const u16 i1[] = {0x69, 0x31, 0xdc87, 0x6c, 0x00}; 33 static const u16 i2[] = {0x69, 0x32, 0xd801, 0xd801, 0x6c, 0x00}; 34 static const u16 i3[] = {0x69, 0x33, 0xd801, 0x00}; 35 36 /* Six characters translating to one utf-16 word each. */ 37 static const char d1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00}; 38 /* Eight characters translating to one utf-16 word each */ 39 static const char d2[] = {0x6b, 0x61, 0x66, 0x62, 0xc3, 0xa1, 0x74, 0x75, 40 0x72, 0x00}; 41 /* Three characters translating to one utf-16 word each */ 42 static const char d3[] = {0xe6, 0xbd, 0x9c, 0xe6, 0xb0, 0xb4, 0xe8, 0x89, 43 0xa6, 0x00}; 44 /* Three letters translating to two utf-16 word each */ 45 static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96, 46 0xf0, 0x90, 0x92, 0x87, 0x00}; 47 48 /* Illegal utf-8 strings */ 49 static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00}; 50 static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00}; 51 static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00}; 52 53 /* U-Boot uses UTF-16 strings in the EFI context only. */ 54 #if CONFIG_IS_ENABLED(EFI_LOADER) && !defined(API_BUILD) 55 static int ut_string16(struct unit_test_state *uts) 56 { 57 char buf[20]; 58 59 /* Test length and precision */ 60 memset(buf, 0xff, sizeof(buf)); 61 sprintf(buf, "%8.6ls", c2); 62 ut_asserteq(' ', buf[1]); 63 ut_assert(!strncmp(&buf[2], d2, 7)); 64 ut_assert(!buf[9]); 65 66 memset(buf, 0xff, sizeof(buf)); 67 sprintf(buf, "%8.6ls", c4); 68 ut_asserteq(' ', buf[4]); 69 ut_assert(!strncmp(&buf[5], d4, 12)); 70 ut_assert(!buf[17]); 71 72 memset(buf, 0xff, sizeof(buf)); 73 sprintf(buf, "%-8.2ls", c4); 74 ut_asserteq(' ', buf[8]); 75 ut_assert(!strncmp(buf, d4, 8)); 76 ut_assert(!buf[14]); 77 78 /* Test handling of illegal utf-16 sequences */ 79 memset(buf, 0xff, sizeof(buf)); 80 sprintf(buf, "%ls", i1); 81 ut_asserteq_str("i1?l", buf); 82 83 memset(buf, 0xff, sizeof(buf)); 84 sprintf(buf, "%ls", i2); 85 ut_asserteq_str("i2?l", buf); 86 87 memset(buf, 0xff, sizeof(buf)); 88 sprintf(buf, "%ls", i3); 89 ut_asserteq_str("i3?", buf); 90 91 return 0; 92 } 93 UNICODE_TEST(ut_string16); 94 #endif 95 96 static int ut_utf8_get(struct unit_test_state *uts) 97 { 98 const char *s; 99 s32 code; 100 int i; 101 102 /* Check characters less than 0x800 */ 103 s = d2; 104 for (i = 0; i < 8; ++i) { 105 code = utf8_get((const char **)&s); 106 /* c2 is the utf-8 encoding of d2 */ 107 ut_asserteq(c2[i], code); 108 if (!code) 109 break; 110 } 111 ut_asserteq_ptr(s, d2 + 9) 112 113 /* Check characters less than 0x10000 */ 114 s = d3; 115 for (i = 0; i < 4; ++i) { 116 code = utf8_get((const char **)&s); 117 /* c3 is the utf-8 encoding of d3 */ 118 ut_asserteq(c3[i], code); 119 if (!code) 120 break; 121 } 122 ut_asserteq_ptr(s, d3 + 9) 123 124 /* Check character greater 0xffff */ 125 s = d4; 126 code = utf8_get((const char **)&s); 127 ut_asserteq(0x0001048d, code); 128 ut_asserteq_ptr(s, d4 + 4); 129 130 return 0; 131 } 132 UNICODE_TEST(ut_utf8_get); 133 134 static int ut_utf8_put(struct unit_test_state *uts) 135 { 136 char buffer[8] = { 0, }; 137 char *pos; 138 139 /* Commercial at, translates to one character */ 140 pos = buffer; 141 ut_assert(!utf8_put('@', &pos)) 142 ut_asserteq(1, pos - buffer); 143 ut_asserteq('@', buffer[0]); 144 ut_assert(!buffer[1]); 145 146 /* Latin letter G with acute, translates to two charactes */ 147 pos = buffer; 148 ut_assert(!utf8_put(0x1f4, &pos)); 149 ut_asserteq(2, pos - buffer); 150 ut_asserteq_str("\xc7\xb4", buffer); 151 152 /* Tagalog letter i, translates to three characters */ 153 pos = buffer; 154 ut_assert(!utf8_put(0x1701, &pos)); 155 ut_asserteq(3, pos - buffer); 156 ut_asserteq_str("\xe1\x9c\x81", buffer); 157 158 /* Hamster face, translates to four characters */ 159 pos = buffer; 160 ut_assert(!utf8_put(0x1f439, &pos)); 161 ut_asserteq(4, pos - buffer); 162 ut_asserteq_str("\xf0\x9f\x90\xb9", buffer); 163 164 /* Illegal code */ 165 pos = buffer; 166 ut_asserteq(-1, utf8_put(0xd888, &pos)); 167 168 return 0; 169 } 170 UNICODE_TEST(ut_utf8_put); 171 172 static int ut_utf8_utf16_strlen(struct unit_test_state *uts) 173 { 174 ut_asserteq(6, utf8_utf16_strlen(d1)); 175 ut_asserteq(8, utf8_utf16_strlen(d2)); 176 ut_asserteq(3, utf8_utf16_strlen(d3)); 177 ut_asserteq(6, utf8_utf16_strlen(d4)); 178 179 /* illegal utf-8 sequences */ 180 ut_asserteq(4, utf8_utf16_strlen(j1)); 181 ut_asserteq(4, utf8_utf16_strlen(j2)); 182 ut_asserteq(3, utf8_utf16_strlen(j3)); 183 184 return 0; 185 } 186 UNICODE_TEST(ut_utf8_utf16_strlen); 187 188 static int ut_utf8_utf16_strnlen(struct unit_test_state *uts) 189 { 190 ut_asserteq(3, utf8_utf16_strnlen(d1, 3)); 191 ut_asserteq(6, utf8_utf16_strnlen(d1, 13)); 192 ut_asserteq(6, utf8_utf16_strnlen(d2, 6)); 193 ut_asserteq(2, utf8_utf16_strnlen(d3, 2)); 194 ut_asserteq(4, utf8_utf16_strnlen(d4, 2)); 195 ut_asserteq(6, utf8_utf16_strnlen(d4, 3)); 196 197 /* illegal utf-8 sequences */ 198 ut_asserteq(4, utf8_utf16_strnlen(j1, 16)); 199 ut_asserteq(4, utf8_utf16_strnlen(j2, 16)); 200 ut_asserteq(3, utf8_utf16_strnlen(j3, 16)); 201 202 return 0; 203 } 204 UNICODE_TEST(ut_utf8_utf16_strnlen); 205 206 /** 207 * ut_u16_strcmp() - Compare to u16 strings. 208 * 209 * @a1: first string 210 * @a2: second string 211 * @count: number of u16 to compare 212 * Return: -1 if a1 < a2, 0 if a1 == a2, 1 if a1 > a2 213 */ 214 static int ut_u16_strcmp(const u16 *a1, const u16 *a2, size_t count) 215 { 216 for (; (*a1 || *a2) && count; ++a1, ++a2, --count) { 217 if (*a1 < *a2) 218 return -1; 219 if (*a1 > *a2) 220 return 1; 221 } 222 return 0; 223 } 224 225 static int ut_utf8_utf16_strcpy(struct unit_test_state *uts) 226 { 227 u16 buf[16]; 228 u16 *pos; 229 230 pos = buf; 231 utf8_utf16_strcpy(&pos, d1); 232 ut_asserteq(6, pos - buf); 233 ut_assert(!ut_u16_strcmp(buf, c1, SIZE_MAX)); 234 235 pos = buf; 236 utf8_utf16_strcpy(&pos, d2); 237 ut_asserteq(8, pos - buf); 238 ut_assert(!ut_u16_strcmp(buf, c2, SIZE_MAX)); 239 240 pos = buf; 241 utf8_utf16_strcpy(&pos, d3); 242 ut_asserteq(3, pos - buf); 243 ut_assert(!ut_u16_strcmp(buf, c3, SIZE_MAX)); 244 245 pos = buf; 246 utf8_utf16_strcpy(&pos, d4); 247 ut_asserteq(6, pos - buf); 248 ut_assert(!ut_u16_strcmp(buf, c4, SIZE_MAX)); 249 250 /* Illegal utf-8 strings */ 251 pos = buf; 252 utf8_utf16_strcpy(&pos, j1); 253 ut_asserteq(4, pos - buf); 254 ut_assert(!ut_u16_strcmp(buf, L"j1?l", SIZE_MAX)); 255 256 pos = buf; 257 utf8_utf16_strcpy(&pos, j2); 258 ut_asserteq(4, pos - buf); 259 ut_assert(!ut_u16_strcmp(buf, L"j2?l", SIZE_MAX)); 260 261 pos = buf; 262 utf8_utf16_strcpy(&pos, j3); 263 ut_asserteq(3, pos - buf); 264 ut_assert(!ut_u16_strcmp(buf, L"j3?", SIZE_MAX)); 265 266 return 0; 267 } 268 UNICODE_TEST(ut_utf8_utf16_strcpy); 269 270 int ut_utf8_utf16_strncpy(struct unit_test_state *uts) 271 { 272 u16 buf[16]; 273 u16 *pos; 274 275 pos = buf; 276 memset(buf, 0, sizeof(buf)); 277 utf8_utf16_strncpy(&pos, d1, 4); 278 ut_asserteq(4, pos - buf); 279 ut_assert(!buf[4]); 280 ut_assert(!ut_u16_strcmp(buf, c1, 4)); 281 282 pos = buf; 283 memset(buf, 0, sizeof(buf)); 284 utf8_utf16_strncpy(&pos, d2, 10); 285 ut_asserteq(8, pos - buf); 286 ut_assert(buf[4]); 287 ut_assert(!ut_u16_strcmp(buf, c2, SIZE_MAX)); 288 289 pos = buf; 290 memset(buf, 0, sizeof(buf)); 291 utf8_utf16_strncpy(&pos, d3, 2); 292 ut_asserteq(2, pos - buf); 293 ut_assert(!buf[2]); 294 ut_assert(!ut_u16_strcmp(buf, c3, 2)); 295 296 pos = buf; 297 memset(buf, 0, sizeof(buf)); 298 utf8_utf16_strncpy(&pos, d4, 2); 299 ut_asserteq(4, pos - buf); 300 ut_assert(!buf[4]); 301 ut_assert(!ut_u16_strcmp(buf, c4, 4)); 302 303 pos = buf; 304 memset(buf, 0, sizeof(buf)); 305 utf8_utf16_strncpy(&pos, d4, 10); 306 ut_asserteq(6, pos - buf); 307 ut_assert(buf[5]); 308 ut_assert(!ut_u16_strcmp(buf, c4, SIZE_MAX)); 309 310 return 0; 311 } 312 UNICODE_TEST(ut_utf8_utf16_strncpy); 313 314 static int ut_utf16_get(struct unit_test_state *uts) 315 { 316 const u16 *s; 317 s32 code; 318 int i; 319 320 /* Check characters less than 0x10000 */ 321 s = c2; 322 for (i = 0; i < 9; ++i) { 323 code = utf16_get((const u16 **)&s); 324 ut_asserteq(c2[i], code); 325 if (!code) 326 break; 327 } 328 ut_asserteq_ptr(c2 + 8, s); 329 330 /* Check character greater 0xffff */ 331 s = c4; 332 code = utf16_get((const u16 **)&s); 333 ut_asserteq(0x0001048d, code); 334 ut_asserteq_ptr(c4 + 2, s); 335 336 return 0; 337 } 338 UNICODE_TEST(ut_utf16_get); 339 340 static int ut_utf16_put(struct unit_test_state *uts) 341 { 342 u16 buffer[4] = { 0, }; 343 u16 *pos; 344 345 /* Commercial at, translates to one word */ 346 pos = buffer; 347 ut_assert(!utf16_put('@', &pos)); 348 ut_asserteq(1, pos - buffer); 349 ut_asserteq((u16)'@', buffer[0]); 350 ut_assert(!buffer[1]); 351 352 /* Hamster face, translates to two words */ 353 pos = buffer; 354 ut_assert(!utf16_put(0x1f439, &pos)); 355 ut_asserteq(2, pos - buffer); 356 ut_asserteq((u16)0xd83d, buffer[0]); 357 ut_asserteq((u16)0xdc39, buffer[1]); 358 ut_assert(!buffer[2]); 359 360 /* Illegal code */ 361 pos = buffer; 362 ut_asserteq(-1, utf16_put(0xd888, &pos)); 363 364 return 0; 365 } 366 UNICODE_TEST(ut_utf16_put); 367 368 int ut_utf16_strnlen(struct unit_test_state *uts) 369 { 370 ut_asserteq(3, utf16_strnlen(c1, 3)); 371 ut_asserteq(6, utf16_strnlen(c1, 13)); 372 ut_asserteq(6, utf16_strnlen(c2, 6)); 373 ut_asserteq(2, utf16_strnlen(c3, 2)); 374 ut_asserteq(2, utf16_strnlen(c4, 2)); 375 ut_asserteq(3, utf16_strnlen(c4, 3)); 376 377 /* illegal utf-16 word sequences */ 378 ut_asserteq(4, utf16_strnlen(i1, 16)); 379 ut_asserteq(4, utf16_strnlen(i2, 16)); 380 ut_asserteq(3, utf16_strnlen(i3, 16)); 381 382 return 0; 383 } 384 UNICODE_TEST(ut_utf16_strnlen); 385 386 int ut_utf16_utf8_strlen(struct unit_test_state *uts) 387 { 388 ut_asserteq(6, utf16_utf8_strlen(c1)); 389 ut_asserteq(9, utf16_utf8_strlen(c2)); 390 ut_asserteq(9, utf16_utf8_strlen(c3)); 391 ut_asserteq(12, utf16_utf8_strlen(c4)); 392 393 /* illegal utf-16 word sequences */ 394 ut_asserteq(4, utf16_utf8_strlen(i1)); 395 ut_asserteq(4, utf16_utf8_strlen(i2)); 396 ut_asserteq(3, utf16_utf8_strlen(i3)); 397 398 return 0; 399 } 400 UNICODE_TEST(ut_utf16_utf8_strlen); 401 402 int ut_utf16_utf8_strnlen(struct unit_test_state *uts) 403 { 404 ut_asserteq(3, utf16_utf8_strnlen(c1, 3)); 405 ut_asserteq(6, utf16_utf8_strnlen(c1, 13)); 406 ut_asserteq(7, utf16_utf8_strnlen(c2, 6)); 407 ut_asserteq(6, utf16_utf8_strnlen(c3, 2)); 408 ut_asserteq(8, utf16_utf8_strnlen(c4, 2)); 409 ut_asserteq(12, utf16_utf8_strnlen(c4, 3)); 410 return 0; 411 } 412 UNICODE_TEST(ut_utf16_utf8_strnlen); 413 414 int ut_utf16_utf8_strcpy(struct unit_test_state *uts) 415 { 416 char buf[16]; 417 char *pos; 418 419 pos = buf; 420 utf16_utf8_strcpy(&pos, c1); 421 ut_asserteq(6, pos - buf); 422 ut_asserteq_str(d1, buf); 423 424 pos = buf; 425 utf16_utf8_strcpy(&pos, c2); 426 ut_asserteq(9, pos - buf); 427 ut_asserteq_str(d2, buf); 428 429 pos = buf; 430 utf16_utf8_strcpy(&pos, c3); 431 ut_asserteq(9, pos - buf); 432 ut_asserteq_str(d3, buf); 433 434 pos = buf; 435 utf16_utf8_strcpy(&pos, c4); 436 ut_asserteq(12, pos - buf); 437 ut_asserteq_str(d4, buf); 438 439 /* Illegal utf-16 strings */ 440 pos = buf; 441 utf16_utf8_strcpy(&pos, i1); 442 ut_asserteq(4, pos - buf); 443 ut_asserteq_str("i1?l", buf); 444 445 pos = buf; 446 utf16_utf8_strcpy(&pos, i2); 447 ut_asserteq(4, pos - buf); 448 ut_asserteq_str("i2?l", buf); 449 450 pos = buf; 451 utf16_utf8_strcpy(&pos, i3); 452 ut_asserteq(3, pos - buf); 453 ut_asserteq_str("i3?", buf); 454 455 return 0; 456 } 457 UNICODE_TEST(ut_utf16_utf8_strcpy); 458 459 int ut_utf16_utf8_strncpy(struct unit_test_state *uts) 460 { 461 char buf[16]; 462 char *pos; 463 464 pos = buf; 465 memset(buf, 0, sizeof(buf)); 466 utf16_utf8_strncpy(&pos, c1, 4); 467 ut_asserteq(4, pos - buf); 468 ut_assert(!buf[4]); 469 ut_assert(!strncmp(buf, d1, 4)); 470 471 pos = buf; 472 memset(buf, 0, sizeof(buf)); 473 utf16_utf8_strncpy(&pos, c2, 10); 474 ut_asserteq(9, pos - buf); 475 ut_assert(buf[4]); 476 ut_assert(!strncmp(buf, d2, SIZE_MAX)); 477 478 pos = buf; 479 memset(buf, 0, sizeof(buf)); 480 utf16_utf8_strncpy(&pos, c3, 2); 481 ut_asserteq(6, pos - buf); 482 ut_assert(!buf[6]); 483 ut_assert(!strncmp(buf, d3, 6)); 484 485 pos = buf; 486 memset(buf, 0, sizeof(buf)); 487 utf16_utf8_strncpy(&pos, c4, 2); 488 ut_asserteq(8, pos - buf); 489 ut_assert(!buf[8]); 490 ut_assert(!strncmp(buf, d4, 8)); 491 492 pos = buf; 493 memset(buf, 0, sizeof(buf)); 494 utf16_utf8_strncpy(&pos, c4, 10); 495 ut_asserteq(12, pos - buf); 496 ut_assert(buf[5]); 497 ut_assert(!strncmp(buf, d4, SIZE_MAX)); 498 499 return 0; 500 } 501 UNICODE_TEST(ut_utf16_utf8_strncpy); 502 503 int ut_utf_to_lower(struct unit_test_state *uts) 504 { 505 ut_asserteq('@', utf_to_lower('@')); 506 ut_asserteq('a', utf_to_lower('A')); 507 ut_asserteq('z', utf_to_lower('Z')); 508 ut_asserteq('[', utf_to_lower('[')); 509 ut_asserteq('m', utf_to_lower('m')); 510 /* Latin letter O with diaresis (umlaut) */ 511 ut_asserteq(0x00f6, utf_to_lower(0x00d6)); 512 #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION 513 /* Cyrillic letter I*/ 514 ut_asserteq(0x0438, utf_to_lower(0x0418)); 515 #endif 516 return 0; 517 } 518 UNICODE_TEST(ut_utf_to_lower); 519 520 int ut_utf_to_upper(struct unit_test_state *uts) 521 { 522 ut_asserteq('`', utf_to_upper('`')); 523 ut_asserteq('A', utf_to_upper('a')); 524 ut_asserteq('Z', utf_to_upper('z')); 525 ut_asserteq('{', utf_to_upper('{')); 526 ut_asserteq('M', utf_to_upper('M')); 527 /* Latin letter O with diaresis (umlaut) */ 528 ut_asserteq(0x00d6, utf_to_upper(0x00f6)); 529 #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION 530 /* Cyrillic letter I */ 531 ut_asserteq(0x0418, utf_to_upper(0x0438)); 532 #endif 533 return 0; 534 } 535 UNICODE_TEST(ut_utf_to_upper); 536 537 int do_ut_unicode(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]) 538 { 539 struct unit_test *tests = ll_entry_start(struct unit_test, unicode_test); 540 const int n_ents = ll_entry_count(struct unit_test, unicode_test); 541 542 return cmd_ut_category("Unicode", tests, n_ents, argc, argv); 543 } 544