1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * charset conversion utils 4 * 5 * Copyright (c) 2017 Rob Clark 6 */ 7 8 #include <charset.h> 9 #include <malloc.h> 10 11 /* 12 * utf8/utf16 conversion mostly lifted from grub 13 */ 14 15 size_t u16_strlen(const u16 *in) 16 { 17 size_t i; 18 for (i = 0; in[i]; i++); 19 return i; 20 } 21 22 size_t u16_strnlen(const u16 *in, size_t count) 23 { 24 size_t i; 25 for (i = 0; count-- && in[i]; i++); 26 return i; 27 } 28 29 uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src) 30 { 31 uint16_t *tmp = dest; 32 33 while ((*dest++ = *src++) != '\0') 34 /* nothing */; 35 return tmp; 36 37 } 38 39 uint16_t *utf16_strdup(const uint16_t *s) 40 { 41 uint16_t *new; 42 43 if (!s) 44 return NULL; 45 new = malloc((u16_strlen(s) + 1) * 2); 46 if (!new) 47 return NULL; 48 utf16_strcpy(new, s); 49 return new; 50 } 51 52 /* Convert UTF-16 to UTF-8. */ 53 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size) 54 { 55 uint32_t code_high = 0; 56 57 while (size--) { 58 uint32_t code = *src++; 59 60 if (code_high) { 61 if (code >= 0xDC00 && code <= 0xDFFF) { 62 /* Surrogate pair. */ 63 code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000; 64 65 *dest++ = (code >> 18) | 0xF0; 66 *dest++ = ((code >> 12) & 0x3F) | 0x80; 67 *dest++ = ((code >> 6) & 0x3F) | 0x80; 68 *dest++ = (code & 0x3F) | 0x80; 69 } else { 70 /* Error... */ 71 *dest++ = '?'; 72 /* *src may be valid. Don't eat it. */ 73 src--; 74 } 75 76 code_high = 0; 77 } else { 78 if (code <= 0x007F) { 79 *dest++ = code; 80 } else if (code <= 0x07FF) { 81 *dest++ = (code >> 6) | 0xC0; 82 *dest++ = (code & 0x3F) | 0x80; 83 } else if (code >= 0xD800 && code <= 0xDBFF) { 84 code_high = code; 85 continue; 86 } else if (code >= 0xDC00 && code <= 0xDFFF) { 87 /* Error... */ 88 *dest++ = '?'; 89 } else if (code < 0x10000) { 90 *dest++ = (code >> 12) | 0xE0; 91 *dest++ = ((code >> 6) & 0x3F) | 0x80; 92 *dest++ = (code & 0x3F) | 0x80; 93 } else { 94 *dest++ = (code >> 18) | 0xF0; 95 *dest++ = ((code >> 12) & 0x3F) | 0x80; 96 *dest++ = ((code >> 6) & 0x3F) | 0x80; 97 *dest++ = (code & 0x3F) | 0x80; 98 } 99 } 100 } 101 102 return dest; 103 } 104 105 uint16_t *utf8_to_utf16(uint16_t *dest, const uint8_t *src, size_t size) 106 { 107 while (size--) { 108 int extension_bytes; 109 uint32_t code; 110 111 extension_bytes = 0; 112 if (*src <= 0x7f) { 113 code = *src++; 114 /* Exit on zero byte */ 115 if (!code) 116 size = 0; 117 } else if (*src <= 0xbf) { 118 /* Illegal code */ 119 code = '?'; 120 } else if (*src <= 0xdf) { 121 code = *src++ & 0x1f; 122 extension_bytes = 1; 123 } else if (*src <= 0xef) { 124 code = *src++ & 0x0f; 125 extension_bytes = 2; 126 } else if (*src <= 0xf7) { 127 code = *src++ & 0x07; 128 extension_bytes = 3; 129 } else { 130 /* Illegal code */ 131 code = '?'; 132 } 133 134 for (; extension_bytes && size; --size, --extension_bytes) { 135 if ((*src & 0xc0) == 0x80) { 136 code <<= 6; 137 code |= *src++ & 0x3f; 138 } else { 139 /* Illegal code */ 140 code = '?'; 141 ++src; 142 --size; 143 break; 144 } 145 } 146 147 if (code < 0x10000) { 148 *dest++ = code; 149 } else { 150 /* 151 * Simplified expression for 152 * (((code - 0x10000) >> 10) & 0x3ff) | 0xd800 153 */ 154 *dest++ = (code >> 10) + 0xd7c0; 155 *dest++ = (code & 0x3ff) | 0xdc00; 156 } 157 } 158 return dest; 159 } 160