1 /* 2 * Copyright (c) 2014 SGI. 3 * All rights reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 */ 15 16 #ifndef UTF8NORM_H 17 #define UTF8NORM_H 18 19 #include <linux/types.h> 20 #include <linux/export.h> 21 #include <linux/string.h> 22 #include <linux/module.h> 23 24 /* Encoding a unicode version number as a single unsigned int. */ 25 #define UNICODE_MAJ_SHIFT (16) 26 #define UNICODE_MIN_SHIFT (8) 27 28 #define UNICODE_AGE(MAJ, MIN, REV) \ 29 (((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \ 30 ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \ 31 ((unsigned int)(REV))) 32 33 /* Highest unicode version supported by the data tables. */ 34 extern int utf8version_is_supported(u8 maj, u8 min, u8 rev); 35 36 /* 37 * Look for the correct const struct utf8data for a unicode version. 38 * Returns NULL if the version requested is too new. 39 * 40 * Two normalization forms are supported: nfdi and nfdicf. 41 * 42 * nfdi: 43 * - Apply unicode normalization form NFD. 44 * - Remove any Default_Ignorable_Code_Point. 45 * 46 * nfdicf: 47 * - Apply unicode normalization form NFD. 48 * - Remove any Default_Ignorable_Code_Point. 49 * - Apply a full casefold (C + F). 50 */ 51 extern const struct utf8data *utf8nfdi(unsigned int maxage); 52 extern const struct utf8data *utf8nfdicf(unsigned int maxage); 53 54 /* 55 * Determine the maximum age of any unicode character in the string. 56 * Returns 0 if only unassigned code points are present. 57 * Returns -1 if the input is not valid UTF-8. 58 */ 59 extern int utf8agemax(const struct utf8data *data, const char *s); 60 extern int utf8nagemax(const struct utf8data *data, const char *s, size_t len); 61 62 /* 63 * Determine the minimum age of any unicode character in the string. 64 * Returns 0 if any unassigned code points are present. 65 * Returns -1 if the input is not valid UTF-8. 66 */ 67 extern int utf8agemin(const struct utf8data *data, const char *s); 68 extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len); 69 70 /* 71 * Determine the length of the normalized from of the string, 72 * excluding any terminating NULL byte. 73 * Returns 0 if only ignorable code points are present. 74 * Returns -1 if the input is not valid UTF-8. 75 */ 76 extern ssize_t utf8len(const struct utf8data *data, const char *s); 77 extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len); 78 79 /* 80 * Cursor structure used by the normalizer. 81 */ 82 struct utf8cursor { 83 const struct utf8data *data; 84 const char *s; 85 const char *p; 86 const char *ss; 87 const char *sp; 88 unsigned int len; 89 unsigned int slen; 90 short int ccc; 91 short int nccc; 92 }; 93 94 /* 95 * Initialize a utf8cursor to normalize a string. 96 * Returns 0 on success. 97 * Returns -1 on failure. 98 */ 99 extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data, 100 const char *s); 101 extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data, 102 const char *s, size_t len); 103 104 /* 105 * Get the next byte in the normalization. 106 * Returns a value > 0 && < 256 on success. 107 * Returns 0 when the end of the normalization is reached. 108 * Returns -1 if the string being normalized is not valid UTF-8. 109 */ 110 extern int utf8byte(struct utf8cursor *u8c); 111 112 #endif /* UTF8NORM_H */ 113