1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * charset conversion utils 4 * 5 * Copyright (c) 2017 Rob Clark 6 */ 7 8 #include <charset.h> 9 #include <malloc.h> 10 11 /* 12 * utf8/utf16 conversion mostly lifted from grub 13 */ 14 15 size_t utf16_strlen(const uint16_t *in) 16 { 17 size_t i; 18 for (i = 0; in[i]; i++); 19 return i; 20 } 21 22 size_t utf16_strnlen(const uint16_t *in, size_t count) 23 { 24 size_t i; 25 for (i = 0; count-- && in[i]; i++); 26 return i; 27 } 28 29 uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src) 30 { 31 uint16_t *tmp = dest; 32 33 while ((*dest++ = *src++) != '\0') 34 /* nothing */; 35 return tmp; 36 37 } 38 39 uint16_t *utf16_strdup(const uint16_t *s) 40 { 41 uint16_t *new; 42 if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2))) 43 return NULL; 44 utf16_strcpy(new, s); 45 return new; 46 } 47 48 /* Convert UTF-16 to UTF-8. */ 49 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size) 50 { 51 uint32_t code_high = 0; 52 53 while (size--) { 54 uint32_t code = *src++; 55 56 if (code_high) { 57 if (code >= 0xDC00 && code <= 0xDFFF) { 58 /* Surrogate pair. */ 59 code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000; 60 61 *dest++ = (code >> 18) | 0xF0; 62 *dest++ = ((code >> 12) & 0x3F) | 0x80; 63 *dest++ = ((code >> 6) & 0x3F) | 0x80; 64 *dest++ = (code & 0x3F) | 0x80; 65 } else { 66 /* Error... */ 67 *dest++ = '?'; 68 /* *src may be valid. Don't eat it. */ 69 src--; 70 } 71 72 code_high = 0; 73 } else { 74 if (code <= 0x007F) { 75 *dest++ = code; 76 } else if (code <= 0x07FF) { 77 *dest++ = (code >> 6) | 0xC0; 78 *dest++ = (code & 0x3F) | 0x80; 79 } else if (code >= 0xD800 && code <= 0xDBFF) { 80 code_high = code; 81 continue; 82 } else if (code >= 0xDC00 && code <= 0xDFFF) { 83 /* Error... */ 84 *dest++ = '?'; 85 } else if (code < 0x10000) { 86 *dest++ = (code >> 12) | 0xE0; 87 *dest++ = ((code >> 6) & 0x3F) | 0x80; 88 *dest++ = (code & 0x3F) | 0x80; 89 } else { 90 *dest++ = (code >> 18) | 0xF0; 91 *dest++ = ((code >> 12) & 0x3F) | 0x80; 92 *dest++ = ((code >> 6) & 0x3F) | 0x80; 93 *dest++ = (code & 0x3F) | 0x80; 94 } 95 } 96 } 97 98 return dest; 99 } 100 101 uint16_t *utf8_to_utf16(uint16_t *dest, const uint8_t *src, size_t size) 102 { 103 while (size--) { 104 int extension_bytes; 105 uint32_t code; 106 107 extension_bytes = 0; 108 if (*src <= 0x7f) { 109 code = *src++; 110 /* Exit on zero byte */ 111 if (!code) 112 size = 0; 113 } else if (*src <= 0xbf) { 114 /* Illegal code */ 115 code = '?'; 116 } else if (*src <= 0xdf) { 117 code = *src++ & 0x1f; 118 extension_bytes = 1; 119 } else if (*src <= 0xef) { 120 code = *src++ & 0x0f; 121 extension_bytes = 2; 122 } else if (*src <= 0xf7) { 123 code = *src++ & 0x07; 124 extension_bytes = 3; 125 } else { 126 /* Illegal code */ 127 code = '?'; 128 } 129 130 for (; extension_bytes && size; --size, --extension_bytes) { 131 if ((*src & 0xc0) == 0x80) { 132 code <<= 6; 133 code |= *src++ & 0x3f; 134 } else { 135 /* Illegal code */ 136 code = '?'; 137 ++src; 138 --size; 139 break; 140 } 141 } 142 143 if (code < 0x10000) { 144 *dest++ = code; 145 } else { 146 /* 147 * Simplified expression for 148 * (((code - 0x10000) >> 10) & 0x3ff) | 0xd800 149 */ 150 *dest++ = (code >> 10) + 0xd7c0; 151 *dest++ = (code & 0x3ff) | 0xdc00; 152 } 153 } 154 return dest; 155 } 156