1f739fcd8STom Rini /* SPDX-License-Identifier: GPL-2.0+ */ 278178bb0SRob Clark /* 378178bb0SRob Clark * charset conversion utils 478178bb0SRob Clark * 578178bb0SRob Clark * Copyright (c) 2017 Rob Clark 678178bb0SRob Clark */ 778178bb0SRob Clark 878178bb0SRob Clark #ifndef __CHARSET_H_ 978178bb0SRob Clark #define __CHARSET_H_ 1078178bb0SRob Clark 11d8c28232SHeinrich Schuchardt #include <linux/kernel.h> 12f58c5ecbSHeinrich Schuchardt #include <linux/types.h> 13f58c5ecbSHeinrich Schuchardt 14984f251fSHeinrich Schuchardt #define MAX_UTF8_PER_UTF16 3 1578178bb0SRob Clark 1678178bb0SRob Clark /** 1735cbb796SHeinrich Schuchardt * console_read_unicode() - read Unicode code point from console 1835cbb796SHeinrich Schuchardt * 1935cbb796SHeinrich Schuchardt * @code: pointer to store Unicode code point 2035cbb796SHeinrich Schuchardt * Return: 0 = success 2135cbb796SHeinrich Schuchardt */ 2235cbb796SHeinrich Schuchardt int console_read_unicode(s32 *code); 2335cbb796SHeinrich Schuchardt 2435cbb796SHeinrich Schuchardt /** 25d8c28232SHeinrich Schuchardt * utf8_get() - get next UTF-8 code point from buffer 26d8c28232SHeinrich Schuchardt * 27d8c28232SHeinrich Schuchardt * @src: pointer to current byte, updated to point to next byte 28d8c28232SHeinrich Schuchardt * Return: code point, or 0 for end of string, or -1 if no legal 29d8c28232SHeinrich Schuchardt * code point is found. In case of an error src points to 30d8c28232SHeinrich Schuchardt * the incorrect byte. 31d8c28232SHeinrich Schuchardt */ 32d8c28232SHeinrich Schuchardt s32 utf8_get(const char **src); 33d8c28232SHeinrich Schuchardt 34d8c28232SHeinrich Schuchardt /** 35d8c28232SHeinrich Schuchardt * utf8_put() - write UTF-8 code point to buffer 36d8c28232SHeinrich Schuchardt * 37d8c28232SHeinrich Schuchardt * @code: code point 38d8c28232SHeinrich Schuchardt * @dst: pointer to destination buffer, updated to next position 39d8c28232SHeinrich Schuchardt * Return: -1 if the input parameters are invalid 40d8c28232SHeinrich Schuchardt */ 41d8c28232SHeinrich Schuchardt int utf8_put(s32 code, char **dst); 42d8c28232SHeinrich Schuchardt 43d8c28232SHeinrich Schuchardt /** 44d8c28232SHeinrich Schuchardt * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion 45d8c28232SHeinrich Schuchardt * to utf-16 46d8c28232SHeinrich Schuchardt * 47d8c28232SHeinrich Schuchardt * @src: utf-8 string 48d8c28232SHeinrich Schuchardt * @count: maximum number of code points to convert 49d8c28232SHeinrich Schuchardt * Return: length in bytes after conversion to utf-16 without the 50d8c28232SHeinrich Schuchardt * trailing \0. If an invalid UTF-8 sequence is hit one 51d8c28232SHeinrich Schuchardt * word will be reserved for a replacement character. 52d8c28232SHeinrich Schuchardt */ 53d8c28232SHeinrich Schuchardt size_t utf8_utf16_strnlen(const char *src, size_t count); 54d8c28232SHeinrich Schuchardt 55d8c28232SHeinrich Schuchardt /** 56d8c28232SHeinrich Schuchardt * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16 57d8c28232SHeinrich Schuchardt * 58d8c28232SHeinrich Schuchardt * @src: utf-8 string 59d8c28232SHeinrich Schuchardt * Return: length in bytes after conversion to utf-16 without the 60d8c28232SHeinrich Schuchardt * trailing \0. -1 if the utf-8 string is not valid. 61d8c28232SHeinrich Schuchardt */ 62d8c28232SHeinrich Schuchardt #define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX) 63d8c28232SHeinrich Schuchardt 64d8c28232SHeinrich Schuchardt /** 65d8c28232SHeinrich Schuchardt * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string 66d8c28232SHeinrich Schuchardt * 67d8c28232SHeinrich Schuchardt * @dst: destination buffer 68d8c28232SHeinrich Schuchardt * @src: source buffer 69d8c28232SHeinrich Schuchardt * @count: maximum number of code points to copy 70d8c28232SHeinrich Schuchardt * Return: -1 if the input parameters are invalid 71d8c28232SHeinrich Schuchardt */ 72d8c28232SHeinrich Schuchardt int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count); 73d8c28232SHeinrich Schuchardt 74d8c28232SHeinrich Schuchardt /** 75d8c28232SHeinrich Schuchardt * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string 76d8c28232SHeinrich Schuchardt * 77d8c28232SHeinrich Schuchardt * @dst: destination buffer 78d8c28232SHeinrich Schuchardt * @src: source buffer 79d8c28232SHeinrich Schuchardt * Return: -1 if the input parameters are invalid 80d8c28232SHeinrich Schuchardt */ 81d8c28232SHeinrich Schuchardt #define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX) 82d8c28232SHeinrich Schuchardt 83d8c28232SHeinrich Schuchardt /** 84d8c28232SHeinrich Schuchardt * utf16_get() - get next UTF-16 code point from buffer 85d8c28232SHeinrich Schuchardt * 86d8c28232SHeinrich Schuchardt * @src: pointer to current word, updated to point to next word 87d8c28232SHeinrich Schuchardt * Return: code point, or 0 for end of string, or -1 if no legal 88d8c28232SHeinrich Schuchardt * code point is found. In case of an error src points to 89d8c28232SHeinrich Schuchardt * the incorrect word. 90d8c28232SHeinrich Schuchardt */ 91d8c28232SHeinrich Schuchardt s32 utf16_get(const u16 **src); 92d8c28232SHeinrich Schuchardt 93d8c28232SHeinrich Schuchardt /** 94d8c28232SHeinrich Schuchardt * utf16_put() - write UTF-16 code point to buffer 95d8c28232SHeinrich Schuchardt * 96d8c28232SHeinrich Schuchardt * @code: code point 97d8c28232SHeinrich Schuchardt * @dst: pointer to destination buffer, updated to next position 98d8c28232SHeinrich Schuchardt * Return: -1 if the input parameters are invalid 99d8c28232SHeinrich Schuchardt */ 100d8c28232SHeinrich Schuchardt int utf16_put(s32 code, u16 **dst); 101d8c28232SHeinrich Schuchardt 102d8c28232SHeinrich Schuchardt /** 103d8c28232SHeinrich Schuchardt * utf16_strnlen() - length of a truncated utf-16 string 104d8c28232SHeinrich Schuchardt * 105d8c28232SHeinrich Schuchardt * @src: utf-16 string 106d8c28232SHeinrich Schuchardt * @count: maximum number of code points to convert 107d8c28232SHeinrich Schuchardt * Return: length in code points. If an invalid UTF-16 sequence is 108d8c28232SHeinrich Schuchardt * hit one position will be reserved for a replacement 109d8c28232SHeinrich Schuchardt * character. 110d8c28232SHeinrich Schuchardt */ 111d8c28232SHeinrich Schuchardt size_t utf16_strnlen(const u16 *src, size_t count); 112d8c28232SHeinrich Schuchardt 113d8c28232SHeinrich Schuchardt /** 114d8c28232SHeinrich Schuchardt * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion 115d8c28232SHeinrich Schuchardt * to utf-8 116d8c28232SHeinrich Schuchardt * 117d8c28232SHeinrich Schuchardt * @src: utf-16 string 118d8c28232SHeinrich Schuchardt * @count: maximum number of code points to convert 119d8c28232SHeinrich Schuchardt * Return: length in bytes after conversion to utf-8 without the 120d8c28232SHeinrich Schuchardt * trailing \0. If an invalid UTF-16 sequence is hit one 121d8c28232SHeinrich Schuchardt * byte will be reserved for a replacement character. 122d8c28232SHeinrich Schuchardt */ 123d8c28232SHeinrich Schuchardt size_t utf16_utf8_strnlen(const u16 *src, size_t count); 124d8c28232SHeinrich Schuchardt 125d8c28232SHeinrich Schuchardt /** 126d8c28232SHeinrich Schuchardt * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8 127d8c28232SHeinrich Schuchardt * 128d8c28232SHeinrich Schuchardt * @src: utf-16 string 129d8c28232SHeinrich Schuchardt * Return: length in bytes after conversion to utf-8 without the 130d8c28232SHeinrich Schuchardt * trailing \0. -1 if the utf-16 string is not valid. 131d8c28232SHeinrich Schuchardt */ 132d8c28232SHeinrich Schuchardt #define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX) 133d8c28232SHeinrich Schuchardt 134d8c28232SHeinrich Schuchardt /** 135d8c28232SHeinrich Schuchardt * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string 136d8c28232SHeinrich Schuchardt * 137d8c28232SHeinrich Schuchardt * @dst: destination buffer 138d8c28232SHeinrich Schuchardt * @src: source buffer 139d8c28232SHeinrich Schuchardt * @count: maximum number of code points to copy 140d8c28232SHeinrich Schuchardt * Return: -1 if the input parameters are invalid 141d8c28232SHeinrich Schuchardt */ 142d8c28232SHeinrich Schuchardt int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count); 143d8c28232SHeinrich Schuchardt 144d8c28232SHeinrich Schuchardt /** 145d8c28232SHeinrich Schuchardt * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string 146d8c28232SHeinrich Schuchardt * 147d8c28232SHeinrich Schuchardt * @dst: destination buffer 148d8c28232SHeinrich Schuchardt * @src: source buffer 149d8c28232SHeinrich Schuchardt * Return: -1 if the input parameters are invalid 150d8c28232SHeinrich Schuchardt */ 151d8c28232SHeinrich Schuchardt #define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX) 152d8c28232SHeinrich Schuchardt 153d8c28232SHeinrich Schuchardt /** 154b5130a81SHeinrich Schuchardt * utf_to_lower() - convert a Unicode letter to lower case 155b5130a81SHeinrich Schuchardt * 156b5130a81SHeinrich Schuchardt * @code: letter to convert 157b5130a81SHeinrich Schuchardt * Return: lower case letter or unchanged letter 158b5130a81SHeinrich Schuchardt */ 159b5130a81SHeinrich Schuchardt s32 utf_to_lower(const s32 code); 160b5130a81SHeinrich Schuchardt 161b5130a81SHeinrich Schuchardt /** 162b5130a81SHeinrich Schuchardt * utf_to_upper() - convert a Unicode letter to upper case 163b5130a81SHeinrich Schuchardt * 164b5130a81SHeinrich Schuchardt * @code: letter to convert 165b5130a81SHeinrich Schuchardt * Return: upper case letter or unchanged letter 166b5130a81SHeinrich Schuchardt */ 167b5130a81SHeinrich Schuchardt s32 utf_to_upper(const s32 code); 168b5130a81SHeinrich Schuchardt 169b5130a81SHeinrich Schuchardt /** 1701dde0d57SHeinrich Schuchardt * u16_strlen - count non-zero words 17178178bb0SRob Clark * 1721dde0d57SHeinrich Schuchardt * This function matches wsclen() if the -fshort-wchar compiler flag is set. 1731dde0d57SHeinrich Schuchardt * In the EFI context we explicitly need a function handling u16 strings. 17478178bb0SRob Clark * 1751dde0d57SHeinrich Schuchardt * @in: null terminated u16 string 1761dde0d57SHeinrich Schuchardt * ReturnValue: number of non-zero words. 1771dde0d57SHeinrich Schuchardt * This is not the number of utf-16 letters! 17878178bb0SRob Clark */ 1791dde0d57SHeinrich Schuchardt size_t u16_strlen(const u16 *in); 18078178bb0SRob Clark 18178178bb0SRob Clark /** 1821dde0d57SHeinrich Schuchardt * u16_strlen - count non-zero words 18378178bb0SRob Clark * 1841dde0d57SHeinrich Schuchardt * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set. 1851dde0d57SHeinrich Schuchardt * In the EFI context we explicitly need a function handling u16 strings. 18678178bb0SRob Clark * 1871dde0d57SHeinrich Schuchardt * @in: null terminated u16 string 1881dde0d57SHeinrich Schuchardt * @count: maximum number of words to count 1891dde0d57SHeinrich Schuchardt * ReturnValue: number of non-zero words. 1901dde0d57SHeinrich Schuchardt * This is not the number of utf-16 letters! 19178178bb0SRob Clark */ 1921dde0d57SHeinrich Schuchardt size_t u16_strnlen(const u16 *in, size_t count); 19378178bb0SRob Clark 19478178bb0SRob Clark /** 195*2a3537aeSAkashi, Takahiro * u16_strcpy() - copy u16 string 196*2a3537aeSAkashi, Takahiro * 197*2a3537aeSAkashi, Takahiro * Copy u16 string pointed to by src, including terminating null word, to 198*2a3537aeSAkashi, Takahiro * the buffer pointed to by dest. 199*2a3537aeSAkashi, Takahiro * 200*2a3537aeSAkashi, Takahiro * @dest: destination buffer 201*2a3537aeSAkashi, Takahiro * @src: source buffer (null terminated) 202*2a3537aeSAkashi, Takahiro * Return: 'dest' address 203*2a3537aeSAkashi, Takahiro */ 204*2a3537aeSAkashi, Takahiro u16 *u16_strcpy(u16 *dest, const u16 *src); 205*2a3537aeSAkashi, Takahiro 206*2a3537aeSAkashi, Takahiro /** 207*2a3537aeSAkashi, Takahiro * u16_strdup() - duplicate u16 string 208*2a3537aeSAkashi, Takahiro * 209*2a3537aeSAkashi, Takahiro * Copy u16 string pointed to by src, including terminating null word, to a 210*2a3537aeSAkashi, Takahiro * newly allocated buffer. 211*2a3537aeSAkashi, Takahiro * 212*2a3537aeSAkashi, Takahiro * @src: source buffer (null terminated) 213*2a3537aeSAkashi, Takahiro * Return: allocated new buffer on success, NULL on failure 214*2a3537aeSAkashi, Takahiro */ 215*2a3537aeSAkashi, Takahiro u16 *u16_strdup(const u16 *src); 216*2a3537aeSAkashi, Takahiro 217*2a3537aeSAkashi, Takahiro /** 21878178bb0SRob Clark * utf16_to_utf8() - Convert an utf16 string to utf8 21978178bb0SRob Clark * 22078178bb0SRob Clark * Converts 'size' characters of the utf16 string 'src' to utf8 22178178bb0SRob Clark * written to the 'dest' buffer. 22278178bb0SRob Clark * 223984f251fSHeinrich Schuchardt * NOTE that a single utf16 character can generate up to 3 utf8 22478178bb0SRob Clark * characters. See MAX_UTF8_PER_UTF16. 22578178bb0SRob Clark * 22678178bb0SRob Clark * @dest the destination buffer to write the utf8 characters 22778178bb0SRob Clark * @src the source utf16 string 22878178bb0SRob Clark * @size the number of utf16 characters to convert 22978178bb0SRob Clark * @return the pointer to the first unwritten byte in 'dest' 23078178bb0SRob Clark */ 23178178bb0SRob Clark uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); 23278178bb0SRob Clark 23378178bb0SRob Clark #endif /* __CHARSET_H_ */ 234