1 /* 2 * charset conversion utils 3 * 4 * Copyright (c) 2017 Rob Clark 5 * 6 * SPDX-License-Identifier: GPL-2.0+ 7 */ 8 9 #include <common.h> 10 #include <charset.h> 11 #include <malloc.h> 12 13 /* 14 * utf8/utf16 conversion mostly lifted from grub 15 */ 16 17 size_t utf16_strlen(const uint16_t *in) 18 { 19 size_t i; 20 for (i = 0; in[i]; i++); 21 return i; 22 } 23 24 size_t utf16_strnlen(const uint16_t *in, size_t count) 25 { 26 size_t i; 27 for (i = 0; count-- && in[i]; i++); 28 return i; 29 } 30 31 uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src) 32 { 33 uint16_t *tmp = dest; 34 35 while ((*dest++ = *src++) != '\0') 36 /* nothing */; 37 return tmp; 38 39 } 40 41 uint16_t *utf16_strdup(const uint16_t *s) 42 { 43 uint16_t *new; 44 if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2))) 45 return NULL; 46 utf16_strcpy(new, s); 47 return new; 48 } 49 50 /* Convert UTF-16 to UTF-8. */ 51 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size) 52 { 53 uint32_t code_high = 0; 54 55 while (size--) { 56 uint32_t code = *src++; 57 58 if (code_high) { 59 if (code >= 0xDC00 && code <= 0xDFFF) { 60 /* Surrogate pair. */ 61 code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000; 62 63 *dest++ = (code >> 18) | 0xF0; 64 *dest++ = ((code >> 12) & 0x3F) | 0x80; 65 *dest++ = ((code >> 6) & 0x3F) | 0x80; 66 *dest++ = (code & 0x3F) | 0x80; 67 } else { 68 /* Error... */ 69 *dest++ = '?'; 70 /* *src may be valid. Don't eat it. */ 71 src--; 72 } 73 74 code_high = 0; 75 } else { 76 if (code <= 0x007F) { 77 *dest++ = code; 78 } else if (code <= 0x07FF) { 79 *dest++ = (code >> 6) | 0xC0; 80 *dest++ = (code & 0x3F) | 0x80; 81 } else if (code >= 0xD800 && code <= 0xDBFF) { 82 code_high = code; 83 continue; 84 } else if (code >= 0xDC00 && code <= 0xDFFF) { 85 /* Error... */ 86 *dest++ = '?'; 87 } else if (code < 0x10000) { 88 *dest++ = (code >> 12) | 0xE0; 89 *dest++ = ((code >> 6) & 0x3F) | 0x80; 90 *dest++ = (code & 0x3F) | 0x80; 91 } else { 92 *dest++ = (code >> 18) | 0xF0; 93 *dest++ = ((code >> 12) & 0x3F) | 0x80; 94 *dest++ = ((code >> 6) & 0x3F) | 0x80; 95 *dest++ = (code & 0x3F) | 0x80; 96 } 97 } 98 } 99 100 return dest; 101 } 102