1 /* SPDX-License-Identifier: GPL-2.0+ */ 2 /* 3 * charset conversion utils 4 * 5 * Copyright (c) 2017 Rob Clark 6 */ 7 8 #ifndef __CHARSET_H_ 9 #define __CHARSET_H_ 10 11 #include <linux/kernel.h> 12 #include <linux/types.h> 13 14 #define MAX_UTF8_PER_UTF16 3 15 16 /** 17 * console_read_unicode() - read Unicode code point from console 18 * 19 * @code: pointer to store Unicode code point 20 * Return: 0 = success 21 */ 22 int console_read_unicode(s32 *code); 23 24 /** 25 * utf8_get() - get next UTF-8 code point from buffer 26 * 27 * @src: pointer to current byte, updated to point to next byte 28 * Return: code point, or 0 for end of string, or -1 if no legal 29 * code point is found. In case of an error src points to 30 * the incorrect byte. 31 */ 32 s32 utf8_get(const char **src); 33 34 /** 35 * utf8_put() - write UTF-8 code point to buffer 36 * 37 * @code: code point 38 * @dst: pointer to destination buffer, updated to next position 39 * Return: -1 if the input parameters are invalid 40 */ 41 int utf8_put(s32 code, char **dst); 42 43 /** 44 * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion 45 * to utf-16 46 * 47 * @src: utf-8 string 48 * @count: maximum number of code points to convert 49 * Return: length in bytes after conversion to utf-16 without the 50 * trailing \0. If an invalid UTF-8 sequence is hit one 51 * word will be reserved for a replacement character. 52 */ 53 size_t utf8_utf16_strnlen(const char *src, size_t count); 54 55 /** 56 * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16 57 * 58 * @src: utf-8 string 59 * Return: length in bytes after conversion to utf-16 without the 60 * trailing \0. -1 if the utf-8 string is not valid. 61 */ 62 #define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX) 63 64 /** 65 * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string 66 * 67 * @dst: destination buffer 68 * @src: source buffer 69 * @count: maximum number of code points to copy 70 * Return: -1 if the input parameters are invalid 71 */ 72 int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count); 73 74 /** 75 * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string 76 * 77 * @dst: destination buffer 78 * @src: source buffer 79 * Return: -1 if the input parameters are invalid 80 */ 81 #define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX) 82 83 /** 84 * utf16_get() - get next UTF-16 code point from buffer 85 * 86 * @src: pointer to current word, updated to point to next word 87 * Return: code point, or 0 for end of string, or -1 if no legal 88 * code point is found. In case of an error src points to 89 * the incorrect word. 90 */ 91 s32 utf16_get(const u16 **src); 92 93 /** 94 * utf16_put() - write UTF-16 code point to buffer 95 * 96 * @code: code point 97 * @dst: pointer to destination buffer, updated to next position 98 * Return: -1 if the input parameters are invalid 99 */ 100 int utf16_put(s32 code, u16 **dst); 101 102 /** 103 * utf16_strnlen() - length of a truncated utf-16 string 104 * 105 * @src: utf-16 string 106 * @count: maximum number of code points to convert 107 * Return: length in code points. If an invalid UTF-16 sequence is 108 * hit one position will be reserved for a replacement 109 * character. 110 */ 111 size_t utf16_strnlen(const u16 *src, size_t count); 112 113 /** 114 * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion 115 * to utf-8 116 * 117 * @src: utf-16 string 118 * @count: maximum number of code points to convert 119 * Return: length in bytes after conversion to utf-8 without the 120 * trailing \0. If an invalid UTF-16 sequence is hit one 121 * byte will be reserved for a replacement character. 122 */ 123 size_t utf16_utf8_strnlen(const u16 *src, size_t count); 124 125 /** 126 * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8 127 * 128 * @src: utf-16 string 129 * Return: length in bytes after conversion to utf-8 without the 130 * trailing \0. -1 if the utf-16 string is not valid. 131 */ 132 #define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX) 133 134 /** 135 * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string 136 * 137 * @dst: destination buffer 138 * @src: source buffer 139 * @count: maximum number of code points to copy 140 * Return: -1 if the input parameters are invalid 141 */ 142 int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count); 143 144 /** 145 * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string 146 * 147 * @dst: destination buffer 148 * @src: source buffer 149 * Return: -1 if the input parameters are invalid 150 */ 151 #define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX) 152 153 /** 154 * utf_to_lower() - convert a Unicode letter to lower case 155 * 156 * @code: letter to convert 157 * Return: lower case letter or unchanged letter 158 */ 159 s32 utf_to_lower(const s32 code); 160 161 /** 162 * utf_to_upper() - convert a Unicode letter to upper case 163 * 164 * @code: letter to convert 165 * Return: upper case letter or unchanged letter 166 */ 167 s32 utf_to_upper(const s32 code); 168 169 /** 170 * u16_strlen - count non-zero words 171 * 172 * This function matches wsclen() if the -fshort-wchar compiler flag is set. 173 * In the EFI context we explicitly need a function handling u16 strings. 174 * 175 * @in: null terminated u16 string 176 * ReturnValue: number of non-zero words. 177 * This is not the number of utf-16 letters! 178 */ 179 size_t u16_strlen(const u16 *in); 180 181 /** 182 * u16_strlen - count non-zero words 183 * 184 * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set. 185 * In the EFI context we explicitly need a function handling u16 strings. 186 * 187 * @in: null terminated u16 string 188 * @count: maximum number of words to count 189 * ReturnValue: number of non-zero words. 190 * This is not the number of utf-16 letters! 191 */ 192 size_t u16_strnlen(const u16 *in, size_t count); 193 194 /** 195 * utf16_to_utf8() - Convert an utf16 string to utf8 196 * 197 * Converts 'size' characters of the utf16 string 'src' to utf8 198 * written to the 'dest' buffer. 199 * 200 * NOTE that a single utf16 character can generate up to 3 utf8 201 * characters. See MAX_UTF8_PER_UTF16. 202 * 203 * @dest the destination buffer to write the utf8 characters 204 * @src the source utf16 string 205 * @size the number of utf16 characters to convert 206 * @return the pointer to the first unwritten byte in 'dest' 207 */ 208 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); 209 210 #endif /* __CHARSET_H_ */ 211