19f806850SThomas Gleixner /* SPDX-License-Identifier: GPL-2.0-only */ 244594c2fSOlaf Weber /* 344594c2fSOlaf Weber * Copyright (c) 2014 SGI. 444594c2fSOlaf Weber * All rights reserved. 544594c2fSOlaf Weber */ 644594c2fSOlaf Weber 744594c2fSOlaf Weber #ifndef UTF8NORM_H 844594c2fSOlaf Weber #define UTF8NORM_H 944594c2fSOlaf Weber 1044594c2fSOlaf Weber #include <linux/types.h> 1144594c2fSOlaf Weber #include <linux/export.h> 1244594c2fSOlaf Weber #include <linux/string.h> 1344594c2fSOlaf Weber #include <linux/module.h> 1449bd03ccSChristoph Hellwig #include <linux/unicode.h> 1544594c2fSOlaf Weber 16*2b3d0478SChristoph Hellwig int utf8version_is_supported(const struct unicode_map *um, unsigned int version); 1744594c2fSOlaf Weber 1844594c2fSOlaf Weber /* 1944594c2fSOlaf Weber * Determine the length of the normalized from of the string, 2044594c2fSOlaf Weber * excluding any terminating NULL byte. 2144594c2fSOlaf Weber * Returns 0 if only ignorable code points are present. 2244594c2fSOlaf Weber * Returns -1 if the input is not valid UTF-8. 2344594c2fSOlaf Weber */ 246ca99ce7SChristoph Hellwig ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n, 256ca99ce7SChristoph Hellwig const char *s, size_t len); 2644594c2fSOlaf Weber 27a8384c68SOlaf Weber /* Needed in struct utf8cursor below. */ 28a8384c68SOlaf Weber #define UTF8HANGULLEAF (12) 29a8384c68SOlaf Weber 3044594c2fSOlaf Weber /* 3144594c2fSOlaf Weber * Cursor structure used by the normalizer. 3244594c2fSOlaf Weber */ 3344594c2fSOlaf Weber struct utf8cursor { 346ca99ce7SChristoph Hellwig const struct unicode_map *um; 356ca99ce7SChristoph Hellwig enum utf8_normalization n; 3644594c2fSOlaf Weber const char *s; 3744594c2fSOlaf Weber const char *p; 3844594c2fSOlaf Weber const char *ss; 3944594c2fSOlaf Weber const char *sp; 4044594c2fSOlaf Weber unsigned int len; 4144594c2fSOlaf Weber unsigned int slen; 4244594c2fSOlaf Weber short int ccc; 4344594c2fSOlaf Weber short int nccc; 44a8384c68SOlaf Weber unsigned char hangul[UTF8HANGULLEAF]; 4544594c2fSOlaf Weber }; 4644594c2fSOlaf Weber 4744594c2fSOlaf Weber /* 4844594c2fSOlaf Weber * Initialize a utf8cursor to normalize a string. 4944594c2fSOlaf Weber * Returns 0 on success. 5044594c2fSOlaf Weber * Returns -1 on failure. 5144594c2fSOlaf Weber */ 526ca99ce7SChristoph Hellwig int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um, 536ca99ce7SChristoph Hellwig enum utf8_normalization n, const char *s, size_t len); 5444594c2fSOlaf Weber 5544594c2fSOlaf Weber /* 5644594c2fSOlaf Weber * Get the next byte in the normalization. 5744594c2fSOlaf Weber * Returns a value > 0 && < 256 on success. 5844594c2fSOlaf Weber * Returns 0 when the end of the normalization is reached. 5944594c2fSOlaf Weber * Returns -1 if the string being normalized is not valid UTF-8. 6044594c2fSOlaf Weber */ 6144594c2fSOlaf Weber extern int utf8byte(struct utf8cursor *u8c); 6244594c2fSOlaf Weber 63*2b3d0478SChristoph Hellwig struct utf8data { 64*2b3d0478SChristoph Hellwig unsigned int maxage; 65*2b3d0478SChristoph Hellwig unsigned int offset; 66*2b3d0478SChristoph Hellwig }; 67*2b3d0478SChristoph Hellwig 68*2b3d0478SChristoph Hellwig struct utf8data_table { 69*2b3d0478SChristoph Hellwig const unsigned int *utf8agetab; 70*2b3d0478SChristoph Hellwig int utf8agetab_size; 71*2b3d0478SChristoph Hellwig 72*2b3d0478SChristoph Hellwig const struct utf8data *utf8nfdicfdata; 73*2b3d0478SChristoph Hellwig int utf8nfdicfdata_size; 74*2b3d0478SChristoph Hellwig 75*2b3d0478SChristoph Hellwig const struct utf8data *utf8nfdidata; 76*2b3d0478SChristoph Hellwig int utf8nfdidata_size; 77*2b3d0478SChristoph Hellwig 78*2b3d0478SChristoph Hellwig const unsigned char *utf8data; 79*2b3d0478SChristoph Hellwig }; 80*2b3d0478SChristoph Hellwig 81*2b3d0478SChristoph Hellwig extern struct utf8data_table utf8_data_table; 82*2b3d0478SChristoph Hellwig 8344594c2fSOlaf Weber #endif /* UTF8NORM_H */ 84