1 /* 2 * Copyright (c) 2014 SGI. 3 * All rights reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 */ 15 16 #ifndef UTF8NORM_H 17 #define UTF8NORM_H 18 19 #include <linux/types.h> 20 #include <linux/export.h> 21 #include <linux/string.h> 22 #include <linux/module.h> 23 24 /* Encoding a unicode version number as a single unsigned int. */ 25 #define UNICODE_MAJ_SHIFT (16) 26 #define UNICODE_MIN_SHIFT (8) 27 28 #define UNICODE_AGE(MAJ, MIN, REV) \ 29 (((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \ 30 ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \ 31 ((unsigned int)(REV))) 32 33 /* Highest unicode version supported by the data tables. */ 34 extern int utf8version_is_supported(u8 maj, u8 min, u8 rev); 35 extern int utf8version_latest(void); 36 37 /* 38 * Look for the correct const struct utf8data for a unicode version. 39 * Returns NULL if the version requested is too new. 40 * 41 * Two normalization forms are supported: nfdi and nfdicf. 42 * 43 * nfdi: 44 * - Apply unicode normalization form NFD. 45 * - Remove any Default_Ignorable_Code_Point. 46 * 47 * nfdicf: 48 * - Apply unicode normalization form NFD. 49 * - Remove any Default_Ignorable_Code_Point. 50 * - Apply a full casefold (C + F). 51 */ 52 extern const struct utf8data *utf8nfdi(unsigned int maxage); 53 extern const struct utf8data *utf8nfdicf(unsigned int maxage); 54 55 /* 56 * Determine the maximum age of any unicode character in the string. 57 * Returns 0 if only unassigned code points are present. 58 * Returns -1 if the input is not valid UTF-8. 59 */ 60 extern int utf8agemax(const struct utf8data *data, const char *s); 61 extern int utf8nagemax(const struct utf8data *data, const char *s, size_t len); 62 63 /* 64 * Determine the minimum age of any unicode character in the string. 65 * Returns 0 if any unassigned code points are present. 66 * Returns -1 if the input is not valid UTF-8. 67 */ 68 extern int utf8agemin(const struct utf8data *data, const char *s); 69 extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len); 70 71 /* 72 * Determine the length of the normalized from of the string, 73 * excluding any terminating NULL byte. 74 * Returns 0 if only ignorable code points are present. 75 * Returns -1 if the input is not valid UTF-8. 76 */ 77 extern ssize_t utf8len(const struct utf8data *data, const char *s); 78 extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len); 79 80 /* Needed in struct utf8cursor below. */ 81 #define UTF8HANGULLEAF (12) 82 83 /* 84 * Cursor structure used by the normalizer. 85 */ 86 struct utf8cursor { 87 const struct utf8data *data; 88 const char *s; 89 const char *p; 90 const char *ss; 91 const char *sp; 92 unsigned int len; 93 unsigned int slen; 94 short int ccc; 95 short int nccc; 96 unsigned char hangul[UTF8HANGULLEAF]; 97 }; 98 99 /* 100 * Initialize a utf8cursor to normalize a string. 101 * Returns 0 on success. 102 * Returns -1 on failure. 103 */ 104 extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data, 105 const char *s); 106 extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data, 107 const char *s, size_t len); 108 109 /* 110 * Get the next byte in the normalization. 111 * Returns a value > 0 && < 256 on success. 112 * Returns 0 when the end of the normalization is reached. 113 * Returns -1 if the string being normalized is not valid UTF-8. 114 */ 115 extern int utf8byte(struct utf8cursor *u8c); 116 117 #endif /* UTF8NORM_H */ 118