1 /* 2 * charset conversion utils 3 * 4 * Copyright (c) 2017 Rob Clark 5 * 6 * SPDX-License-Identifier: GPL-2.0+ 7 */ 8 9 #include <charset.h> 10 #include <malloc.h> 11 12 /* 13 * utf8/utf16 conversion mostly lifted from grub 14 */ 15 16 size_t utf16_strlen(const uint16_t *in) 17 { 18 size_t i; 19 for (i = 0; in[i]; i++); 20 return i; 21 } 22 23 size_t utf16_strnlen(const uint16_t *in, size_t count) 24 { 25 size_t i; 26 for (i = 0; count-- && in[i]; i++); 27 return i; 28 } 29 30 uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src) 31 { 32 uint16_t *tmp = dest; 33 34 while ((*dest++ = *src++) != '\0') 35 /* nothing */; 36 return tmp; 37 38 } 39 40 uint16_t *utf16_strdup(const uint16_t *s) 41 { 42 uint16_t *new; 43 if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2))) 44 return NULL; 45 utf16_strcpy(new, s); 46 return new; 47 } 48 49 /* Convert UTF-16 to UTF-8. */ 50 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size) 51 { 52 uint32_t code_high = 0; 53 54 while (size--) { 55 uint32_t code = *src++; 56 57 if (code_high) { 58 if (code >= 0xDC00 && code <= 0xDFFF) { 59 /* Surrogate pair. */ 60 code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000; 61 62 *dest++ = (code >> 18) | 0xF0; 63 *dest++ = ((code >> 12) & 0x3F) | 0x80; 64 *dest++ = ((code >> 6) & 0x3F) | 0x80; 65 *dest++ = (code & 0x3F) | 0x80; 66 } else { 67 /* Error... */ 68 *dest++ = '?'; 69 /* *src may be valid. Don't eat it. */ 70 src--; 71 } 72 73 code_high = 0; 74 } else { 75 if (code <= 0x007F) { 76 *dest++ = code; 77 } else if (code <= 0x07FF) { 78 *dest++ = (code >> 6) | 0xC0; 79 *dest++ = (code & 0x3F) | 0x80; 80 } else if (code >= 0xD800 && code <= 0xDBFF) { 81 code_high = code; 82 continue; 83 } else if (code >= 0xDC00 && code <= 0xDFFF) { 84 /* Error... */ 85 *dest++ = '?'; 86 } else if (code < 0x10000) { 87 *dest++ = (code >> 12) | 0xE0; 88 *dest++ = ((code >> 6) & 0x3F) | 0x80; 89 *dest++ = (code & 0x3F) | 0x80; 90 } else { 91 *dest++ = (code >> 18) | 0xF0; 92 *dest++ = ((code >> 12) & 0x3F) | 0x80; 93 *dest++ = ((code >> 6) & 0x3F) | 0x80; 94 *dest++ = (code & 0x3F) | 0x80; 95 } 96 } 97 } 98 99 return dest; 100 } 101 102 uint16_t *utf8_to_utf16(uint16_t *dest, const uint8_t *src, size_t size) 103 { 104 while (size--) { 105 int extension_bytes; 106 uint32_t code; 107 108 extension_bytes = 0; 109 if (*src <= 0x7f) { 110 code = *src++; 111 /* Exit on zero byte */ 112 if (!code) 113 size = 0; 114 } else if (*src <= 0xbf) { 115 /* Illegal code */ 116 code = '?'; 117 } else if (*src <= 0xdf) { 118 code = *src++ & 0x1f; 119 extension_bytes = 1; 120 } else if (*src <= 0xef) { 121 code = *src++ & 0x0f; 122 extension_bytes = 2; 123 } else if (*src <= 0xf7) { 124 code = *src++ & 0x07; 125 extension_bytes = 3; 126 } else { 127 /* Illegal code */ 128 code = '?'; 129 } 130 131 for (; extension_bytes && size; --size, --extension_bytes) { 132 if ((*src & 0xc0) == 0x80) { 133 code <<= 6; 134 code |= *src++ & 0x3f; 135 } else { 136 /* Illegal code */ 137 code = '?'; 138 ++src; 139 --size; 140 break; 141 } 142 } 143 144 if (code < 0x10000) { 145 *dest++ = code; 146 } else { 147 /* 148 * Simplified expression for 149 * (((code - 0x10000) >> 10) & 0x3ff) | 0xd800 150 */ 151 *dest++ = (code >> 10) + 0xd7c0; 152 *dest++ = (code & 0x3ff) | 0xdc00; 153 } 154 } 155 return dest; 156 } 157