1f739fcd8STom Rini // SPDX-License-Identifier: GPL-2.0+
278178bb0SRob Clark /*
378178bb0SRob Clark * charset conversion utils
478178bb0SRob Clark *
578178bb0SRob Clark * Copyright (c) 2017 Rob Clark
678178bb0SRob Clark */
778178bb0SRob Clark
835cbb796SHeinrich Schuchardt #include <common.h>
978178bb0SRob Clark #include <charset.h>
10b5130a81SHeinrich Schuchardt #include <capitalization.h>
1178178bb0SRob Clark #include <malloc.h>
1278178bb0SRob Clark
13b5130a81SHeinrich Schuchardt static struct capitalization_table capitalization_table[] =
14b5130a81SHeinrich Schuchardt #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
15b5130a81SHeinrich Schuchardt UNICODE_CAPITALIZATION_TABLE;
16b5130a81SHeinrich Schuchardt #elif CONFIG_FAT_DEFAULT_CODEPAGE == 1250
17b5130a81SHeinrich Schuchardt CP1250_CAPITALIZATION_TABLE;
18b5130a81SHeinrich Schuchardt #else
19b5130a81SHeinrich Schuchardt CP437_CAPITALIZATION_TABLE;
20b5130a81SHeinrich Schuchardt #endif
21b5130a81SHeinrich Schuchardt
2235cbb796SHeinrich Schuchardt /**
2335cbb796SHeinrich Schuchardt * get_code() - read Unicode code point from UTF-8 stream
2435cbb796SHeinrich Schuchardt *
2535cbb796SHeinrich Schuchardt * @read_u8: - stream reader
2635cbb796SHeinrich Schuchardt * @src: - string buffer passed to stream reader, optional
2735cbb796SHeinrich Schuchardt * Return: - Unicode code point
2878178bb0SRob Clark */
get_code(u8 (* read_u8)(void * data),void * data)2935cbb796SHeinrich Schuchardt static int get_code(u8 (*read_u8)(void *data), void *data)
3035cbb796SHeinrich Schuchardt {
3135cbb796SHeinrich Schuchardt s32 ch = 0;
3235cbb796SHeinrich Schuchardt
3335cbb796SHeinrich Schuchardt ch = read_u8(data);
3435cbb796SHeinrich Schuchardt if (!ch)
3535cbb796SHeinrich Schuchardt return 0;
3635cbb796SHeinrich Schuchardt if (ch >= 0xc2 && ch <= 0xf4) {
3735cbb796SHeinrich Schuchardt int code = 0;
3835cbb796SHeinrich Schuchardt
3935cbb796SHeinrich Schuchardt if (ch >= 0xe0) {
4035cbb796SHeinrich Schuchardt if (ch >= 0xf0) {
41d8c28232SHeinrich Schuchardt /* 0xf0 - 0xf4 */
4235cbb796SHeinrich Schuchardt ch &= 0x07;
4335cbb796SHeinrich Schuchardt code = ch << 18;
4435cbb796SHeinrich Schuchardt ch = read_u8(data);
4535cbb796SHeinrich Schuchardt if (ch < 0x80 || ch > 0xbf)
4635cbb796SHeinrich Schuchardt goto error;
4735cbb796SHeinrich Schuchardt ch &= 0x3f;
48d8c28232SHeinrich Schuchardt } else {
49d8c28232SHeinrich Schuchardt /* 0xe0 - 0xef */
5035cbb796SHeinrich Schuchardt ch &= 0x0f;
51d8c28232SHeinrich Schuchardt }
5235cbb796SHeinrich Schuchardt code += ch << 12;
53d8c28232SHeinrich Schuchardt if ((code >= 0xD800 && code <= 0xDFFF) ||
54d8c28232SHeinrich Schuchardt code >= 0x110000)
5535cbb796SHeinrich Schuchardt goto error;
5635cbb796SHeinrich Schuchardt ch = read_u8(data);
5735cbb796SHeinrich Schuchardt if (ch < 0x80 || ch > 0xbf)
5835cbb796SHeinrich Schuchardt goto error;
59d8c28232SHeinrich Schuchardt }
60d8c28232SHeinrich Schuchardt /* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */
6135cbb796SHeinrich Schuchardt ch &= 0x3f;
6235cbb796SHeinrich Schuchardt code += ch << 6;
6335cbb796SHeinrich Schuchardt ch = read_u8(data);
6435cbb796SHeinrich Schuchardt if (ch < 0x80 || ch > 0xbf)
6535cbb796SHeinrich Schuchardt goto error;
6635cbb796SHeinrich Schuchardt ch &= 0x3f;
6735cbb796SHeinrich Schuchardt ch += code;
6835cbb796SHeinrich Schuchardt } else if (ch >= 0x80) {
6935cbb796SHeinrich Schuchardt goto error;
70d8c28232SHeinrich Schuchardt }
7135cbb796SHeinrich Schuchardt return ch;
7235cbb796SHeinrich Schuchardt error:
7335cbb796SHeinrich Schuchardt return '?';
7435cbb796SHeinrich Schuchardt }
7535cbb796SHeinrich Schuchardt
7635cbb796SHeinrich Schuchardt /**
7735cbb796SHeinrich Schuchardt * read_string() - read byte from character string
7835cbb796SHeinrich Schuchardt *
7935cbb796SHeinrich Schuchardt * @data: - pointer to string
8035cbb796SHeinrich Schuchardt * Return: - byte read
8135cbb796SHeinrich Schuchardt *
8235cbb796SHeinrich Schuchardt * The string pointer is incremented if it does not point to '\0'.
8335cbb796SHeinrich Schuchardt */
read_string(void * data)8435cbb796SHeinrich Schuchardt static u8 read_string(void *data)
8535cbb796SHeinrich Schuchardt
8635cbb796SHeinrich Schuchardt {
8735cbb796SHeinrich Schuchardt const char **src = (const char **)data;
8835cbb796SHeinrich Schuchardt u8 c;
8935cbb796SHeinrich Schuchardt
9035cbb796SHeinrich Schuchardt if (!src || !*src || !**src)
9135cbb796SHeinrich Schuchardt return 0;
9235cbb796SHeinrich Schuchardt c = **src;
93d8c28232SHeinrich Schuchardt ++*src;
9435cbb796SHeinrich Schuchardt return c;
9535cbb796SHeinrich Schuchardt }
9635cbb796SHeinrich Schuchardt
9735cbb796SHeinrich Schuchardt /**
9835cbb796SHeinrich Schuchardt * read_console() - read byte from console
9935cbb796SHeinrich Schuchardt *
10060d79876SHeinrich Schuchardt * @data - not used, needed to match interface
10160d79876SHeinrich Schuchardt * Return: - byte read or 0 on error
10235cbb796SHeinrich Schuchardt */
read_console(void * data)10335cbb796SHeinrich Schuchardt static u8 read_console(void *data)
10435cbb796SHeinrich Schuchardt {
10560d79876SHeinrich Schuchardt int ch;
10660d79876SHeinrich Schuchardt
10760d79876SHeinrich Schuchardt ch = getc();
10860d79876SHeinrich Schuchardt if (ch < 0)
10960d79876SHeinrich Schuchardt ch = 0;
11060d79876SHeinrich Schuchardt return ch;
11135cbb796SHeinrich Schuchardt }
11235cbb796SHeinrich Schuchardt
console_read_unicode(s32 * code)11335cbb796SHeinrich Schuchardt int console_read_unicode(s32 *code)
11435cbb796SHeinrich Schuchardt {
11535cbb796SHeinrich Schuchardt if (!tstc()) {
11635cbb796SHeinrich Schuchardt /* No input available */
11735cbb796SHeinrich Schuchardt return 1;
11835cbb796SHeinrich Schuchardt }
11935cbb796SHeinrich Schuchardt
12035cbb796SHeinrich Schuchardt /* Read Unicode code */
12135cbb796SHeinrich Schuchardt *code = get_code(read_console, NULL);
12235cbb796SHeinrich Schuchardt return 0;
12335cbb796SHeinrich Schuchardt }
12435cbb796SHeinrich Schuchardt
utf8_get(const char ** src)12535cbb796SHeinrich Schuchardt s32 utf8_get(const char **src)
12635cbb796SHeinrich Schuchardt {
12735cbb796SHeinrich Schuchardt return get_code(read_string, src);
128d8c28232SHeinrich Schuchardt }
129d8c28232SHeinrich Schuchardt
utf8_put(s32 code,char ** dst)130d8c28232SHeinrich Schuchardt int utf8_put(s32 code, char **dst)
131d8c28232SHeinrich Schuchardt {
132d8c28232SHeinrich Schuchardt if (!dst || !*dst)
133d8c28232SHeinrich Schuchardt return -1;
134d8c28232SHeinrich Schuchardt if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
135d8c28232SHeinrich Schuchardt return -1;
136d8c28232SHeinrich Schuchardt if (code <= 0x007F) {
137d8c28232SHeinrich Schuchardt **dst = code;
138d8c28232SHeinrich Schuchardt } else {
139d8c28232SHeinrich Schuchardt if (code <= 0x07FF) {
140d8c28232SHeinrich Schuchardt **dst = code >> 6 | 0xC0;
141d8c28232SHeinrich Schuchardt } else {
142d8c28232SHeinrich Schuchardt if (code < 0x10000) {
143d8c28232SHeinrich Schuchardt **dst = code >> 12 | 0xE0;
144d8c28232SHeinrich Schuchardt } else {
145d8c28232SHeinrich Schuchardt **dst = code >> 18 | 0xF0;
146d8c28232SHeinrich Schuchardt ++*dst;
147d8c28232SHeinrich Schuchardt **dst = (code >> 12 & 0x3F) | 0x80;
148d8c28232SHeinrich Schuchardt }
149d8c28232SHeinrich Schuchardt ++*dst;
150d8c28232SHeinrich Schuchardt **dst = (code >> 6 & 0x3F) | 0x80;
151d8c28232SHeinrich Schuchardt }
152d8c28232SHeinrich Schuchardt ++*dst;
153d8c28232SHeinrich Schuchardt **dst = (code & 0x3F) | 0x80;
154d8c28232SHeinrich Schuchardt }
155d8c28232SHeinrich Schuchardt ++*dst;
156d8c28232SHeinrich Schuchardt return 0;
157d8c28232SHeinrich Schuchardt }
158d8c28232SHeinrich Schuchardt
utf8_utf16_strnlen(const char * src,size_t count)159d8c28232SHeinrich Schuchardt size_t utf8_utf16_strnlen(const char *src, size_t count)
160d8c28232SHeinrich Schuchardt {
161d8c28232SHeinrich Schuchardt size_t len = 0;
162d8c28232SHeinrich Schuchardt
163d8c28232SHeinrich Schuchardt for (; *src && count; --count) {
164d8c28232SHeinrich Schuchardt s32 code = utf8_get(&src);
165d8c28232SHeinrich Schuchardt
166d8c28232SHeinrich Schuchardt if (!code)
167d8c28232SHeinrich Schuchardt break;
168d8c28232SHeinrich Schuchardt if (code < 0) {
169d8c28232SHeinrich Schuchardt /* Reserve space for a replacement character */
170d8c28232SHeinrich Schuchardt len += 1;
171d8c28232SHeinrich Schuchardt } else if (code < 0x10000) {
172d8c28232SHeinrich Schuchardt len += 1;
173d8c28232SHeinrich Schuchardt } else {
174d8c28232SHeinrich Schuchardt len += 2;
175d8c28232SHeinrich Schuchardt }
176d8c28232SHeinrich Schuchardt }
177d8c28232SHeinrich Schuchardt return len;
178d8c28232SHeinrich Schuchardt }
179d8c28232SHeinrich Schuchardt
utf8_utf16_strncpy(u16 ** dst,const char * src,size_t count)180d8c28232SHeinrich Schuchardt int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count)
181d8c28232SHeinrich Schuchardt {
182d8c28232SHeinrich Schuchardt if (!src || !dst || !*dst)
183d8c28232SHeinrich Schuchardt return -1;
184d8c28232SHeinrich Schuchardt
185d8c28232SHeinrich Schuchardt for (; count && *src; --count) {
186d8c28232SHeinrich Schuchardt s32 code = utf8_get(&src);
187d8c28232SHeinrich Schuchardt
188d8c28232SHeinrich Schuchardt if (code < 0)
189d8c28232SHeinrich Schuchardt code = '?';
190d8c28232SHeinrich Schuchardt utf16_put(code, dst);
191d8c28232SHeinrich Schuchardt }
192d8c28232SHeinrich Schuchardt **dst = 0;
193d8c28232SHeinrich Schuchardt return 0;
194d8c28232SHeinrich Schuchardt }
195d8c28232SHeinrich Schuchardt
utf16_get(const u16 ** src)196d8c28232SHeinrich Schuchardt s32 utf16_get(const u16 **src)
197d8c28232SHeinrich Schuchardt {
198d8c28232SHeinrich Schuchardt s32 code, code2;
199d8c28232SHeinrich Schuchardt
200d8c28232SHeinrich Schuchardt if (!src || !*src)
201d8c28232SHeinrich Schuchardt return -1;
202d8c28232SHeinrich Schuchardt if (!**src)
203d8c28232SHeinrich Schuchardt return 0;
204d8c28232SHeinrich Schuchardt code = **src;
205d8c28232SHeinrich Schuchardt ++*src;
206d8c28232SHeinrich Schuchardt if (code >= 0xDC00 && code <= 0xDFFF)
207d8c28232SHeinrich Schuchardt return -1;
208d8c28232SHeinrich Schuchardt if (code >= 0xD800 && code <= 0xDBFF) {
209d8c28232SHeinrich Schuchardt if (!**src)
210d8c28232SHeinrich Schuchardt return -1;
211d8c28232SHeinrich Schuchardt code &= 0x3ff;
212d8c28232SHeinrich Schuchardt code <<= 10;
213d8c28232SHeinrich Schuchardt code += 0x10000;
214d8c28232SHeinrich Schuchardt code2 = **src;
215d8c28232SHeinrich Schuchardt ++*src;
216d8c28232SHeinrich Schuchardt if (code2 <= 0xDC00 || code2 >= 0xDFFF)
217d8c28232SHeinrich Schuchardt return -1;
218d8c28232SHeinrich Schuchardt code2 &= 0x3ff;
219d8c28232SHeinrich Schuchardt code += code2;
220d8c28232SHeinrich Schuchardt }
221d8c28232SHeinrich Schuchardt return code;
222d8c28232SHeinrich Schuchardt }
223d8c28232SHeinrich Schuchardt
utf16_put(s32 code,u16 ** dst)224d8c28232SHeinrich Schuchardt int utf16_put(s32 code, u16 **dst)
225d8c28232SHeinrich Schuchardt {
226d8c28232SHeinrich Schuchardt if (!dst || !*dst)
227d8c28232SHeinrich Schuchardt return -1;
228d8c28232SHeinrich Schuchardt if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
229d8c28232SHeinrich Schuchardt return -1;
230d8c28232SHeinrich Schuchardt if (code < 0x10000) {
231d8c28232SHeinrich Schuchardt **dst = code;
232d8c28232SHeinrich Schuchardt } else {
233d8c28232SHeinrich Schuchardt code -= 0x10000;
234d8c28232SHeinrich Schuchardt **dst = code >> 10 | 0xD800;
235d8c28232SHeinrich Schuchardt ++*dst;
236d8c28232SHeinrich Schuchardt **dst = (code & 0x3ff) | 0xDC00;
237d8c28232SHeinrich Schuchardt }
238d8c28232SHeinrich Schuchardt ++*dst;
239d8c28232SHeinrich Schuchardt return 0;
240d8c28232SHeinrich Schuchardt }
241d8c28232SHeinrich Schuchardt
utf16_strnlen(const u16 * src,size_t count)242d8c28232SHeinrich Schuchardt size_t utf16_strnlen(const u16 *src, size_t count)
243d8c28232SHeinrich Schuchardt {
244d8c28232SHeinrich Schuchardt size_t len = 0;
245d8c28232SHeinrich Schuchardt
246d8c28232SHeinrich Schuchardt for (; *src && count; --count) {
247d8c28232SHeinrich Schuchardt s32 code = utf16_get(&src);
248d8c28232SHeinrich Schuchardt
249d8c28232SHeinrich Schuchardt if (!code)
250d8c28232SHeinrich Schuchardt break;
251d8c28232SHeinrich Schuchardt /*
252d8c28232SHeinrich Schuchardt * In case of an illegal sequence still reserve space for a
253d8c28232SHeinrich Schuchardt * replacement character.
254d8c28232SHeinrich Schuchardt */
255d8c28232SHeinrich Schuchardt ++len;
256d8c28232SHeinrich Schuchardt }
257d8c28232SHeinrich Schuchardt return len;
258d8c28232SHeinrich Schuchardt }
259d8c28232SHeinrich Schuchardt
utf16_utf8_strnlen(const u16 * src,size_t count)260d8c28232SHeinrich Schuchardt size_t utf16_utf8_strnlen(const u16 *src, size_t count)
261d8c28232SHeinrich Schuchardt {
262d8c28232SHeinrich Schuchardt size_t len = 0;
263d8c28232SHeinrich Schuchardt
264d8c28232SHeinrich Schuchardt for (; *src && count; --count) {
265d8c28232SHeinrich Schuchardt s32 code = utf16_get(&src);
266d8c28232SHeinrich Schuchardt
267d8c28232SHeinrich Schuchardt if (!code)
268d8c28232SHeinrich Schuchardt break;
269d8c28232SHeinrich Schuchardt if (code < 0)
270d8c28232SHeinrich Schuchardt /* Reserve space for a replacement character */
271d8c28232SHeinrich Schuchardt len += 1;
272d8c28232SHeinrich Schuchardt else if (code < 0x80)
273d8c28232SHeinrich Schuchardt len += 1;
274d8c28232SHeinrich Schuchardt else if (code < 0x800)
275d8c28232SHeinrich Schuchardt len += 2;
276d8c28232SHeinrich Schuchardt else if (code < 0x10000)
277d8c28232SHeinrich Schuchardt len += 3;
278d8c28232SHeinrich Schuchardt else
279d8c28232SHeinrich Schuchardt len += 4;
280d8c28232SHeinrich Schuchardt }
281d8c28232SHeinrich Schuchardt return len;
282d8c28232SHeinrich Schuchardt }
283d8c28232SHeinrich Schuchardt
utf16_utf8_strncpy(char ** dst,const u16 * src,size_t count)284d8c28232SHeinrich Schuchardt int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count)
285d8c28232SHeinrich Schuchardt {
286d8c28232SHeinrich Schuchardt if (!src || !dst || !*dst)
287d8c28232SHeinrich Schuchardt return -1;
288d8c28232SHeinrich Schuchardt
289d8c28232SHeinrich Schuchardt for (; count && *src; --count) {
290d8c28232SHeinrich Schuchardt s32 code = utf16_get(&src);
291d8c28232SHeinrich Schuchardt
292d8c28232SHeinrich Schuchardt if (code < 0)
293d8c28232SHeinrich Schuchardt code = '?';
294d8c28232SHeinrich Schuchardt utf8_put(code, dst);
295d8c28232SHeinrich Schuchardt }
296d8c28232SHeinrich Schuchardt **dst = 0;
297d8c28232SHeinrich Schuchardt return 0;
298d8c28232SHeinrich Schuchardt }
299d8c28232SHeinrich Schuchardt
utf_to_lower(const s32 code)300b5130a81SHeinrich Schuchardt s32 utf_to_lower(const s32 code)
301b5130a81SHeinrich Schuchardt {
302b5130a81SHeinrich Schuchardt struct capitalization_table *pos = capitalization_table;
303b5130a81SHeinrich Schuchardt s32 ret = code;
304b5130a81SHeinrich Schuchardt
305b5130a81SHeinrich Schuchardt if (code <= 0x7f) {
306b5130a81SHeinrich Schuchardt if (code >= 'A' && code <= 'Z')
307b5130a81SHeinrich Schuchardt ret += 0x20;
308b5130a81SHeinrich Schuchardt return ret;
309b5130a81SHeinrich Schuchardt }
310b5130a81SHeinrich Schuchardt for (; pos->upper; ++pos) {
311b5130a81SHeinrich Schuchardt if (pos->upper == code) {
312b5130a81SHeinrich Schuchardt ret = pos->lower;
313b5130a81SHeinrich Schuchardt break;
314b5130a81SHeinrich Schuchardt }
315b5130a81SHeinrich Schuchardt }
316b5130a81SHeinrich Schuchardt return ret;
317b5130a81SHeinrich Schuchardt }
318b5130a81SHeinrich Schuchardt
utf_to_upper(const s32 code)319b5130a81SHeinrich Schuchardt s32 utf_to_upper(const s32 code)
320b5130a81SHeinrich Schuchardt {
321b5130a81SHeinrich Schuchardt struct capitalization_table *pos = capitalization_table;
322b5130a81SHeinrich Schuchardt s32 ret = code;
323b5130a81SHeinrich Schuchardt
324b5130a81SHeinrich Schuchardt if (code <= 0x7f) {
325b5130a81SHeinrich Schuchardt if (code >= 'a' && code <= 'z')
326b5130a81SHeinrich Schuchardt ret -= 0x20;
327b5130a81SHeinrich Schuchardt return ret;
328b5130a81SHeinrich Schuchardt }
329b5130a81SHeinrich Schuchardt for (; pos->lower; ++pos) {
330b5130a81SHeinrich Schuchardt if (pos->lower == code) {
331b5130a81SHeinrich Schuchardt ret = pos->upper;
332b5130a81SHeinrich Schuchardt break;
333b5130a81SHeinrich Schuchardt }
334b5130a81SHeinrich Schuchardt }
335b5130a81SHeinrich Schuchardt return ret;
336b5130a81SHeinrich Schuchardt }
33778178bb0SRob Clark
u16_strlen(const u16 * in)3381dde0d57SHeinrich Schuchardt size_t u16_strlen(const u16 *in)
33978178bb0SRob Clark {
34078178bb0SRob Clark size_t i;
34178178bb0SRob Clark for (i = 0; in[i]; i++);
34278178bb0SRob Clark return i;
34378178bb0SRob Clark }
34478178bb0SRob Clark
u16_strnlen(const u16 * in,size_t count)3451dde0d57SHeinrich Schuchardt size_t u16_strnlen(const u16 *in, size_t count)
34678178bb0SRob Clark {
34778178bb0SRob Clark size_t i;
34878178bb0SRob Clark for (i = 0; count-- && in[i]; i++);
34978178bb0SRob Clark return i;
35078178bb0SRob Clark }
35178178bb0SRob Clark
u16_strcpy(u16 * dest,const u16 * src)352*2a3537aeSAkashi, Takahiro u16 *u16_strcpy(u16 *dest, const u16 *src)
353*2a3537aeSAkashi, Takahiro {
354*2a3537aeSAkashi, Takahiro u16 *tmp = dest;
355*2a3537aeSAkashi, Takahiro
356*2a3537aeSAkashi, Takahiro for (;; dest++, src++) {
357*2a3537aeSAkashi, Takahiro *dest = *src;
358*2a3537aeSAkashi, Takahiro if (!*src)
359*2a3537aeSAkashi, Takahiro break;
360*2a3537aeSAkashi, Takahiro }
361*2a3537aeSAkashi, Takahiro
362*2a3537aeSAkashi, Takahiro return tmp;
363*2a3537aeSAkashi, Takahiro }
364*2a3537aeSAkashi, Takahiro
u16_strdup(const u16 * src)365*2a3537aeSAkashi, Takahiro u16 *u16_strdup(const u16 *src)
366*2a3537aeSAkashi, Takahiro {
367*2a3537aeSAkashi, Takahiro u16 *new;
368*2a3537aeSAkashi, Takahiro
369*2a3537aeSAkashi, Takahiro if (!src)
370*2a3537aeSAkashi, Takahiro return NULL;
371*2a3537aeSAkashi, Takahiro
372*2a3537aeSAkashi, Takahiro new = malloc((u16_strlen(src) + 1) * sizeof(u16));
373*2a3537aeSAkashi, Takahiro if (!new)
374*2a3537aeSAkashi, Takahiro return NULL;
375*2a3537aeSAkashi, Takahiro
376*2a3537aeSAkashi, Takahiro u16_strcpy(new, src);
377*2a3537aeSAkashi, Takahiro
378*2a3537aeSAkashi, Takahiro return new;
379*2a3537aeSAkashi, Takahiro }
380*2a3537aeSAkashi, Takahiro
38178178bb0SRob Clark /* Convert UTF-16 to UTF-8. */
utf16_to_utf8(uint8_t * dest,const uint16_t * src,size_t size)38278178bb0SRob Clark uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
38378178bb0SRob Clark {
38478178bb0SRob Clark uint32_t code_high = 0;
38578178bb0SRob Clark
38678178bb0SRob Clark while (size--) {
38778178bb0SRob Clark uint32_t code = *src++;
38878178bb0SRob Clark
38978178bb0SRob Clark if (code_high) {
39078178bb0SRob Clark if (code >= 0xDC00 && code <= 0xDFFF) {
39178178bb0SRob Clark /* Surrogate pair. */
39278178bb0SRob Clark code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
39378178bb0SRob Clark
39478178bb0SRob Clark *dest++ = (code >> 18) | 0xF0;
39578178bb0SRob Clark *dest++ = ((code >> 12) & 0x3F) | 0x80;
39678178bb0SRob Clark *dest++ = ((code >> 6) & 0x3F) | 0x80;
39778178bb0SRob Clark *dest++ = (code & 0x3F) | 0x80;
39878178bb0SRob Clark } else {
39978178bb0SRob Clark /* Error... */
40078178bb0SRob Clark *dest++ = '?';
40178178bb0SRob Clark /* *src may be valid. Don't eat it. */
40278178bb0SRob Clark src--;
40378178bb0SRob Clark }
40478178bb0SRob Clark
40578178bb0SRob Clark code_high = 0;
40678178bb0SRob Clark } else {
40778178bb0SRob Clark if (code <= 0x007F) {
40878178bb0SRob Clark *dest++ = code;
40978178bb0SRob Clark } else if (code <= 0x07FF) {
41078178bb0SRob Clark *dest++ = (code >> 6) | 0xC0;
41178178bb0SRob Clark *dest++ = (code & 0x3F) | 0x80;
41278178bb0SRob Clark } else if (code >= 0xD800 && code <= 0xDBFF) {
41378178bb0SRob Clark code_high = code;
41478178bb0SRob Clark continue;
41578178bb0SRob Clark } else if (code >= 0xDC00 && code <= 0xDFFF) {
41678178bb0SRob Clark /* Error... */
41778178bb0SRob Clark *dest++ = '?';
41878178bb0SRob Clark } else if (code < 0x10000) {
41978178bb0SRob Clark *dest++ = (code >> 12) | 0xE0;
42078178bb0SRob Clark *dest++ = ((code >> 6) & 0x3F) | 0x80;
42178178bb0SRob Clark *dest++ = (code & 0x3F) | 0x80;
42278178bb0SRob Clark } else {
42378178bb0SRob Clark *dest++ = (code >> 18) | 0xF0;
42478178bb0SRob Clark *dest++ = ((code >> 12) & 0x3F) | 0x80;
42578178bb0SRob Clark *dest++ = ((code >> 6) & 0x3F) | 0x80;
42678178bb0SRob Clark *dest++ = (code & 0x3F) | 0x80;
42778178bb0SRob Clark }
42878178bb0SRob Clark }
42978178bb0SRob Clark }
43078178bb0SRob Clark
43178178bb0SRob Clark return dest;
43278178bb0SRob Clark }
433