1 /* SPDX-License-Identifier: GPL-2.0+ */ 2 /* 3 * charset conversion utils 4 * 5 * Copyright (c) 2017 Rob Clark 6 */ 7 8 #ifndef __CHARSET_H_ 9 #define __CHARSET_H_ 10 11 #include <linux/kernel.h> 12 #include <linux/types.h> 13 14 #define MAX_UTF8_PER_UTF16 3 15 16 /** 17 * utf8_get() - get next UTF-8 code point from buffer 18 * 19 * @src: pointer to current byte, updated to point to next byte 20 * Return: code point, or 0 for end of string, or -1 if no legal 21 * code point is found. In case of an error src points to 22 * the incorrect byte. 23 */ 24 s32 utf8_get(const char **src); 25 26 /** 27 * utf8_put() - write UTF-8 code point to buffer 28 * 29 * @code: code point 30 * @dst: pointer to destination buffer, updated to next position 31 * Return: -1 if the input parameters are invalid 32 */ 33 int utf8_put(s32 code, char **dst); 34 35 /** 36 * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion 37 * to utf-16 38 * 39 * @src: utf-8 string 40 * @count: maximum number of code points to convert 41 * Return: length in bytes after conversion to utf-16 without the 42 * trailing \0. If an invalid UTF-8 sequence is hit one 43 * word will be reserved for a replacement character. 44 */ 45 size_t utf8_utf16_strnlen(const char *src, size_t count); 46 47 /** 48 * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16 49 * 50 * @src: utf-8 string 51 * Return: length in bytes after conversion to utf-16 without the 52 * trailing \0. -1 if the utf-8 string is not valid. 53 */ 54 #define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX) 55 56 /** 57 * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string 58 * 59 * @dst: destination buffer 60 * @src: source buffer 61 * @count: maximum number of code points to copy 62 * Return: -1 if the input parameters are invalid 63 */ 64 int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count); 65 66 /** 67 * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string 68 * 69 * @dst: destination buffer 70 * @src: source buffer 71 * Return: -1 if the input parameters are invalid 72 */ 73 #define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX) 74 75 /** 76 * utf16_get() - get next UTF-16 code point from buffer 77 * 78 * @src: pointer to current word, updated to point to next word 79 * Return: code point, or 0 for end of string, or -1 if no legal 80 * code point is found. In case of an error src points to 81 * the incorrect word. 82 */ 83 s32 utf16_get(const u16 **src); 84 85 /** 86 * utf16_put() - write UTF-16 code point to buffer 87 * 88 * @code: code point 89 * @dst: pointer to destination buffer, updated to next position 90 * Return: -1 if the input parameters are invalid 91 */ 92 int utf16_put(s32 code, u16 **dst); 93 94 /** 95 * utf16_strnlen() - length of a truncated utf-16 string 96 * 97 * @src: utf-16 string 98 * @count: maximum number of code points to convert 99 * Return: length in code points. If an invalid UTF-16 sequence is 100 * hit one position will be reserved for a replacement 101 * character. 102 */ 103 size_t utf16_strnlen(const u16 *src, size_t count); 104 105 /** 106 * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion 107 * to utf-8 108 * 109 * @src: utf-16 string 110 * @count: maximum number of code points to convert 111 * Return: length in bytes after conversion to utf-8 without the 112 * trailing \0. If an invalid UTF-16 sequence is hit one 113 * byte will be reserved for a replacement character. 114 */ 115 size_t utf16_utf8_strnlen(const u16 *src, size_t count); 116 117 /** 118 * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8 119 * 120 * @src: utf-16 string 121 * Return: length in bytes after conversion to utf-8 without the 122 * trailing \0. -1 if the utf-16 string is not valid. 123 */ 124 #define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX) 125 126 /** 127 * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string 128 * 129 * @dst: destination buffer 130 * @src: source buffer 131 * @count: maximum number of code points to copy 132 * Return: -1 if the input parameters are invalid 133 */ 134 int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count); 135 136 /** 137 * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string 138 * 139 * @dst: destination buffer 140 * @src: source buffer 141 * Return: -1 if the input parameters are invalid 142 */ 143 #define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX) 144 145 /** 146 * u16_strlen - count non-zero words 147 * 148 * This function matches wsclen() if the -fshort-wchar compiler flag is set. 149 * In the EFI context we explicitly need a function handling u16 strings. 150 * 151 * @in: null terminated u16 string 152 * ReturnValue: number of non-zero words. 153 * This is not the number of utf-16 letters! 154 */ 155 size_t u16_strlen(const u16 *in); 156 157 /** 158 * u16_strlen - count non-zero words 159 * 160 * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set. 161 * In the EFI context we explicitly need a function handling u16 strings. 162 * 163 * @in: null terminated u16 string 164 * @count: maximum number of words to count 165 * ReturnValue: number of non-zero words. 166 * This is not the number of utf-16 letters! 167 */ 168 size_t u16_strnlen(const u16 *in, size_t count); 169 170 /** 171 * utf16_to_utf8() - Convert an utf16 string to utf8 172 * 173 * Converts 'size' characters of the utf16 string 'src' to utf8 174 * written to the 'dest' buffer. 175 * 176 * NOTE that a single utf16 character can generate up to 3 utf8 177 * characters. See MAX_UTF8_PER_UTF16. 178 * 179 * @dest the destination buffer to write the utf8 characters 180 * @src the source utf16 string 181 * @size the number of utf16 characters to convert 182 * @return the pointer to the first unwritten byte in 'dest' 183 */ 184 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); 185 186 #endif /* __CHARSET_H_ */ 187