xref: /openbmc/u-boot/lib/charset.c (revision 82e0646a)
1 /*
2  *  charset conversion utils
3  *
4  *  Copyright (c) 2017 Rob Clark
5  *
6  *  SPDX-License-Identifier:     GPL-2.0+
7  */
8 
9 #include <common.h>
10 #include <charset.h>
11 #include <malloc.h>
12 
13 /*
14  * utf8/utf16 conversion mostly lifted from grub
15  */
16 
17 size_t utf16_strlen(const uint16_t *in)
18 {
19 	size_t i;
20 	for (i = 0; in[i]; i++);
21 	return i;
22 }
23 
24 size_t utf16_strnlen(const uint16_t *in, size_t count)
25 {
26 	size_t i;
27 	for (i = 0; count-- && in[i]; i++);
28 	return i;
29 }
30 
31 uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src)
32 {
33 	uint16_t *tmp = dest;
34 
35 	while ((*dest++ = *src++) != '\0')
36 		/* nothing */;
37 	return tmp;
38 
39 }
40 
41 uint16_t *utf16_strdup(const uint16_t *s)
42 {
43 	uint16_t *new;
44 	if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2)))
45 		return NULL;
46 	utf16_strcpy(new, s);
47 	return new;
48 }
49 
50 /* Convert UTF-16 to UTF-8.  */
51 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
52 {
53 	uint32_t code_high = 0;
54 
55 	while (size--) {
56 		uint32_t code = *src++;
57 
58 		if (code_high) {
59 			if (code >= 0xDC00 && code <= 0xDFFF) {
60 				/* Surrogate pair.  */
61 				code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
62 
63 				*dest++ = (code >> 18) | 0xF0;
64 				*dest++ = ((code >> 12) & 0x3F) | 0x80;
65 				*dest++ = ((code >> 6) & 0x3F) | 0x80;
66 				*dest++ = (code & 0x3F) | 0x80;
67 			} else {
68 				/* Error...  */
69 				*dest++ = '?';
70 				/* *src may be valid. Don't eat it.  */
71 				src--;
72 			}
73 
74 			code_high = 0;
75 		} else {
76 			if (code <= 0x007F) {
77 				*dest++ = code;
78 			} else if (code <= 0x07FF) {
79 				*dest++ = (code >> 6) | 0xC0;
80 				*dest++ = (code & 0x3F) | 0x80;
81 			} else if (code >= 0xD800 && code <= 0xDBFF) {
82 				code_high = code;
83 				continue;
84 			} else if (code >= 0xDC00 && code <= 0xDFFF) {
85 				/* Error... */
86 				*dest++ = '?';
87 			} else if (code < 0x10000) {
88 				*dest++ = (code >> 12) | 0xE0;
89 				*dest++ = ((code >> 6) & 0x3F) | 0x80;
90 				*dest++ = (code & 0x3F) | 0x80;
91 			} else {
92 				*dest++ = (code >> 18) | 0xF0;
93 				*dest++ = ((code >> 12) & 0x3F) | 0x80;
94 				*dest++ = ((code >> 6) & 0x3F) | 0x80;
95 				*dest++ = (code & 0x3F) | 0x80;
96 			}
97 		}
98 	}
99 
100 	return dest;
101 }
102