xref: /openbmc/u-boot/lib/charset.c (revision e087905a48ee8042ffdd9531b6aada6be2751ed1)
1  // SPDX-License-Identifier: GPL-2.0+
2  /*
3   *  charset conversion utils
4   *
5   *  Copyright (c) 2017 Rob Clark
6   */
7  
8  #include <charset.h>
9  #include <malloc.h>
10  
11  /*
12   * utf8/utf16 conversion mostly lifted from grub
13   */
14  
15  size_t utf16_strlen(const uint16_t *in)
16  {
17  	size_t i;
18  	for (i = 0; in[i]; i++);
19  	return i;
20  }
21  
22  size_t utf16_strnlen(const uint16_t *in, size_t count)
23  {
24  	size_t i;
25  	for (i = 0; count-- && in[i]; i++);
26  	return i;
27  }
28  
29  uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src)
30  {
31  	uint16_t *tmp = dest;
32  
33  	while ((*dest++ = *src++) != '\0')
34  		/* nothing */;
35  	return tmp;
36  
37  }
38  
39  uint16_t *utf16_strdup(const uint16_t *s)
40  {
41  	uint16_t *new;
42  	if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2)))
43  		return NULL;
44  	utf16_strcpy(new, s);
45  	return new;
46  }
47  
48  /* Convert UTF-16 to UTF-8.  */
49  uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
50  {
51  	uint32_t code_high = 0;
52  
53  	while (size--) {
54  		uint32_t code = *src++;
55  
56  		if (code_high) {
57  			if (code >= 0xDC00 && code <= 0xDFFF) {
58  				/* Surrogate pair.  */
59  				code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
60  
61  				*dest++ = (code >> 18) | 0xF0;
62  				*dest++ = ((code >> 12) & 0x3F) | 0x80;
63  				*dest++ = ((code >> 6) & 0x3F) | 0x80;
64  				*dest++ = (code & 0x3F) | 0x80;
65  			} else {
66  				/* Error...  */
67  				*dest++ = '?';
68  				/* *src may be valid. Don't eat it.  */
69  				src--;
70  			}
71  
72  			code_high = 0;
73  		} else {
74  			if (code <= 0x007F) {
75  				*dest++ = code;
76  			} else if (code <= 0x07FF) {
77  				*dest++ = (code >> 6) | 0xC0;
78  				*dest++ = (code & 0x3F) | 0x80;
79  			} else if (code >= 0xD800 && code <= 0xDBFF) {
80  				code_high = code;
81  				continue;
82  			} else if (code >= 0xDC00 && code <= 0xDFFF) {
83  				/* Error... */
84  				*dest++ = '?';
85  			} else if (code < 0x10000) {
86  				*dest++ = (code >> 12) | 0xE0;
87  				*dest++ = ((code >> 6) & 0x3F) | 0x80;
88  				*dest++ = (code & 0x3F) | 0x80;
89  			} else {
90  				*dest++ = (code >> 18) | 0xF0;
91  				*dest++ = ((code >> 12) & 0x3F) | 0x80;
92  				*dest++ = ((code >> 6) & 0x3F) | 0x80;
93  				*dest++ = (code & 0x3F) | 0x80;
94  			}
95  		}
96  	}
97  
98  	return dest;
99  }
100  
101  uint16_t *utf8_to_utf16(uint16_t *dest, const uint8_t *src, size_t size)
102  {
103  	while (size--) {
104  		int extension_bytes;
105  		uint32_t code;
106  
107  		extension_bytes = 0;
108  		if (*src <= 0x7f) {
109  			code = *src++;
110  			/* Exit on zero byte */
111  			if (!code)
112  				size = 0;
113  		} else if (*src <= 0xbf) {
114  			/* Illegal code */
115  			code = '?';
116  		} else if (*src <= 0xdf) {
117  			code = *src++ & 0x1f;
118  			extension_bytes = 1;
119  		} else if (*src <= 0xef) {
120  			code = *src++ & 0x0f;
121  			extension_bytes = 2;
122  		} else if (*src <= 0xf7) {
123  			code = *src++ & 0x07;
124  			extension_bytes = 3;
125  		} else {
126  			/* Illegal code */
127  			code = '?';
128  		}
129  
130  		for (; extension_bytes && size; --size, --extension_bytes) {
131  			if ((*src & 0xc0) == 0x80) {
132  				code <<= 6;
133  				code |= *src++ & 0x3f;
134  			} else {
135  				/* Illegal code */
136  				code = '?';
137  				++src;
138  				--size;
139  				break;
140  			}
141  		}
142  
143  		if (code < 0x10000) {
144  			*dest++ = code;
145  		} else {
146  			/*
147  			 * Simplified expression for
148  			 * (((code - 0x10000) >> 10) & 0x3ff) | 0xd800
149  			 */
150  			*dest++ = (code >> 10) + 0xd7c0;
151  			*dest++ = (code & 0x3ff) | 0xdc00;
152  		}
153  	}
154  	return dest;
155  }
156