1*9d53690fSGabriel Krisman Bertazi /* SPDX-License-Identifier: GPL-2.0 */ 2*9d53690fSGabriel Krisman Bertazi #include <linux/module.h> 3*9d53690fSGabriel Krisman Bertazi #include <linux/kernel.h> 4*9d53690fSGabriel Krisman Bertazi #include <linux/string.h> 5*9d53690fSGabriel Krisman Bertazi #include <linux/slab.h> 6*9d53690fSGabriel Krisman Bertazi #include <linux/parser.h> 7*9d53690fSGabriel Krisman Bertazi #include <linux/errno.h> 8*9d53690fSGabriel Krisman Bertazi #include <linux/unicode.h> 9*9d53690fSGabriel Krisman Bertazi 10*9d53690fSGabriel Krisman Bertazi #include "utf8n.h" 11*9d53690fSGabriel Krisman Bertazi 12*9d53690fSGabriel Krisman Bertazi int utf8_validate(const struct unicode_map *um, const struct qstr *str) 13*9d53690fSGabriel Krisman Bertazi { 14*9d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 15*9d53690fSGabriel Krisman Bertazi 16*9d53690fSGabriel Krisman Bertazi if (utf8nlen(data, str->name, str->len) < 0) 17*9d53690fSGabriel Krisman Bertazi return -1; 18*9d53690fSGabriel Krisman Bertazi return 0; 19*9d53690fSGabriel Krisman Bertazi } 20*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_validate); 21*9d53690fSGabriel Krisman Bertazi 22*9d53690fSGabriel Krisman Bertazi int utf8_strncmp(const struct unicode_map *um, 23*9d53690fSGabriel Krisman Bertazi const struct qstr *s1, const struct qstr *s2) 24*9d53690fSGabriel Krisman Bertazi { 25*9d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 26*9d53690fSGabriel Krisman Bertazi struct utf8cursor cur1, cur2; 27*9d53690fSGabriel Krisman Bertazi int c1, c2; 28*9d53690fSGabriel Krisman Bertazi 29*9d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 30*9d53690fSGabriel Krisman Bertazi return -EINVAL; 31*9d53690fSGabriel Krisman Bertazi 32*9d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) 33*9d53690fSGabriel Krisman Bertazi return -EINVAL; 34*9d53690fSGabriel Krisman Bertazi 35*9d53690fSGabriel Krisman Bertazi do { 36*9d53690fSGabriel Krisman Bertazi c1 = utf8byte(&cur1); 37*9d53690fSGabriel Krisman Bertazi c2 = utf8byte(&cur2); 38*9d53690fSGabriel Krisman Bertazi 39*9d53690fSGabriel Krisman Bertazi if (c1 < 0 || c2 < 0) 40*9d53690fSGabriel Krisman Bertazi return -EINVAL; 41*9d53690fSGabriel Krisman Bertazi if (c1 != c2) 42*9d53690fSGabriel Krisman Bertazi return 1; 43*9d53690fSGabriel Krisman Bertazi } while (c1); 44*9d53690fSGabriel Krisman Bertazi 45*9d53690fSGabriel Krisman Bertazi return 0; 46*9d53690fSGabriel Krisman Bertazi } 47*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncmp); 48*9d53690fSGabriel Krisman Bertazi 49*9d53690fSGabriel Krisman Bertazi int utf8_strncasecmp(const struct unicode_map *um, 50*9d53690fSGabriel Krisman Bertazi const struct qstr *s1, const struct qstr *s2) 51*9d53690fSGabriel Krisman Bertazi { 52*9d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdicf(um->version); 53*9d53690fSGabriel Krisman Bertazi struct utf8cursor cur1, cur2; 54*9d53690fSGabriel Krisman Bertazi int c1, c2; 55*9d53690fSGabriel Krisman Bertazi 56*9d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 57*9d53690fSGabriel Krisman Bertazi return -EINVAL; 58*9d53690fSGabriel Krisman Bertazi 59*9d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) 60*9d53690fSGabriel Krisman Bertazi return -EINVAL; 61*9d53690fSGabriel Krisman Bertazi 62*9d53690fSGabriel Krisman Bertazi do { 63*9d53690fSGabriel Krisman Bertazi c1 = utf8byte(&cur1); 64*9d53690fSGabriel Krisman Bertazi c2 = utf8byte(&cur2); 65*9d53690fSGabriel Krisman Bertazi 66*9d53690fSGabriel Krisman Bertazi if (c1 < 0 || c2 < 0) 67*9d53690fSGabriel Krisman Bertazi return -EINVAL; 68*9d53690fSGabriel Krisman Bertazi if (c1 != c2) 69*9d53690fSGabriel Krisman Bertazi return 1; 70*9d53690fSGabriel Krisman Bertazi } while (c1); 71*9d53690fSGabriel Krisman Bertazi 72*9d53690fSGabriel Krisman Bertazi return 0; 73*9d53690fSGabriel Krisman Bertazi } 74*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp); 75*9d53690fSGabriel Krisman Bertazi 76*9d53690fSGabriel Krisman Bertazi int utf8_casefold(const struct unicode_map *um, const struct qstr *str, 77*9d53690fSGabriel Krisman Bertazi unsigned char *dest, size_t dlen) 78*9d53690fSGabriel Krisman Bertazi { 79*9d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdicf(um->version); 80*9d53690fSGabriel Krisman Bertazi struct utf8cursor cur; 81*9d53690fSGabriel Krisman Bertazi size_t nlen = 0; 82*9d53690fSGabriel Krisman Bertazi 83*9d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur, data, str->name, str->len) < 0) 84*9d53690fSGabriel Krisman Bertazi return -EINVAL; 85*9d53690fSGabriel Krisman Bertazi 86*9d53690fSGabriel Krisman Bertazi for (nlen = 0; nlen < dlen; nlen++) { 87*9d53690fSGabriel Krisman Bertazi int c = utf8byte(&cur); 88*9d53690fSGabriel Krisman Bertazi 89*9d53690fSGabriel Krisman Bertazi dest[nlen] = c; 90*9d53690fSGabriel Krisman Bertazi if (!c) 91*9d53690fSGabriel Krisman Bertazi return nlen; 92*9d53690fSGabriel Krisman Bertazi if (c == -1) 93*9d53690fSGabriel Krisman Bertazi break; 94*9d53690fSGabriel Krisman Bertazi } 95*9d53690fSGabriel Krisman Bertazi return -EINVAL; 96*9d53690fSGabriel Krisman Bertazi } 97*9d53690fSGabriel Krisman Bertazi 98*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_casefold); 99*9d53690fSGabriel Krisman Bertazi 100*9d53690fSGabriel Krisman Bertazi int utf8_normalize(const struct unicode_map *um, const struct qstr *str, 101*9d53690fSGabriel Krisman Bertazi unsigned char *dest, size_t dlen) 102*9d53690fSGabriel Krisman Bertazi { 103*9d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 104*9d53690fSGabriel Krisman Bertazi struct utf8cursor cur; 105*9d53690fSGabriel Krisman Bertazi ssize_t nlen = 0; 106*9d53690fSGabriel Krisman Bertazi 107*9d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur, data, str->name, str->len) < 0) 108*9d53690fSGabriel Krisman Bertazi return -EINVAL; 109*9d53690fSGabriel Krisman Bertazi 110*9d53690fSGabriel Krisman Bertazi for (nlen = 0; nlen < dlen; nlen++) { 111*9d53690fSGabriel Krisman Bertazi int c = utf8byte(&cur); 112*9d53690fSGabriel Krisman Bertazi 113*9d53690fSGabriel Krisman Bertazi dest[nlen] = c; 114*9d53690fSGabriel Krisman Bertazi if (!c) 115*9d53690fSGabriel Krisman Bertazi return nlen; 116*9d53690fSGabriel Krisman Bertazi if (c == -1) 117*9d53690fSGabriel Krisman Bertazi break; 118*9d53690fSGabriel Krisman Bertazi } 119*9d53690fSGabriel Krisman Bertazi return -EINVAL; 120*9d53690fSGabriel Krisman Bertazi } 121*9d53690fSGabriel Krisman Bertazi 122*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_normalize); 123*9d53690fSGabriel Krisman Bertazi 124*9d53690fSGabriel Krisman Bertazi static int utf8_parse_version(const char *version, unsigned int *maj, 125*9d53690fSGabriel Krisman Bertazi unsigned int *min, unsigned int *rev) 126*9d53690fSGabriel Krisman Bertazi { 127*9d53690fSGabriel Krisman Bertazi substring_t args[3]; 128*9d53690fSGabriel Krisman Bertazi char version_string[12]; 129*9d53690fSGabriel Krisman Bertazi const struct match_token token[] = { 130*9d53690fSGabriel Krisman Bertazi {1, "%d.%d.%d"}, 131*9d53690fSGabriel Krisman Bertazi {0, NULL} 132*9d53690fSGabriel Krisman Bertazi }; 133*9d53690fSGabriel Krisman Bertazi 134*9d53690fSGabriel Krisman Bertazi strncpy(version_string, version, sizeof(version_string)); 135*9d53690fSGabriel Krisman Bertazi 136*9d53690fSGabriel Krisman Bertazi if (match_token(version_string, token, args) != 1) 137*9d53690fSGabriel Krisman Bertazi return -EINVAL; 138*9d53690fSGabriel Krisman Bertazi 139*9d53690fSGabriel Krisman Bertazi if (match_int(&args[0], maj) || match_int(&args[1], min) || 140*9d53690fSGabriel Krisman Bertazi match_int(&args[2], rev)) 141*9d53690fSGabriel Krisman Bertazi return -EINVAL; 142*9d53690fSGabriel Krisman Bertazi 143*9d53690fSGabriel Krisman Bertazi return 0; 144*9d53690fSGabriel Krisman Bertazi } 145*9d53690fSGabriel Krisman Bertazi 146*9d53690fSGabriel Krisman Bertazi struct unicode_map *utf8_load(const char *version) 147*9d53690fSGabriel Krisman Bertazi { 148*9d53690fSGabriel Krisman Bertazi struct unicode_map *um = NULL; 149*9d53690fSGabriel Krisman Bertazi int unicode_version; 150*9d53690fSGabriel Krisman Bertazi 151*9d53690fSGabriel Krisman Bertazi if (version) { 152*9d53690fSGabriel Krisman Bertazi unsigned int maj, min, rev; 153*9d53690fSGabriel Krisman Bertazi 154*9d53690fSGabriel Krisman Bertazi if (utf8_parse_version(version, &maj, &min, &rev) < 0) 155*9d53690fSGabriel Krisman Bertazi return ERR_PTR(-EINVAL); 156*9d53690fSGabriel Krisman Bertazi 157*9d53690fSGabriel Krisman Bertazi if (!utf8version_is_supported(maj, min, rev)) 158*9d53690fSGabriel Krisman Bertazi return ERR_PTR(-EINVAL); 159*9d53690fSGabriel Krisman Bertazi 160*9d53690fSGabriel Krisman Bertazi unicode_version = UNICODE_AGE(maj, min, rev); 161*9d53690fSGabriel Krisman Bertazi } else { 162*9d53690fSGabriel Krisman Bertazi unicode_version = utf8version_latest(); 163*9d53690fSGabriel Krisman Bertazi printk(KERN_WARNING"UTF-8 version not specified. " 164*9d53690fSGabriel Krisman Bertazi "Assuming latest supported version (%d.%d.%d).", 165*9d53690fSGabriel Krisman Bertazi (unicode_version >> 16) & 0xff, 166*9d53690fSGabriel Krisman Bertazi (unicode_version >> 8) & 0xff, 167*9d53690fSGabriel Krisman Bertazi (unicode_version & 0xff)); 168*9d53690fSGabriel Krisman Bertazi } 169*9d53690fSGabriel Krisman Bertazi 170*9d53690fSGabriel Krisman Bertazi um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL); 171*9d53690fSGabriel Krisman Bertazi if (!um) 172*9d53690fSGabriel Krisman Bertazi return ERR_PTR(-ENOMEM); 173*9d53690fSGabriel Krisman Bertazi 174*9d53690fSGabriel Krisman Bertazi um->charset = "UTF-8"; 175*9d53690fSGabriel Krisman Bertazi um->version = unicode_version; 176*9d53690fSGabriel Krisman Bertazi 177*9d53690fSGabriel Krisman Bertazi return um; 178*9d53690fSGabriel Krisman Bertazi } 179*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_load); 180*9d53690fSGabriel Krisman Bertazi 181*9d53690fSGabriel Krisman Bertazi void utf8_unload(struct unicode_map *um) 182*9d53690fSGabriel Krisman Bertazi { 183*9d53690fSGabriel Krisman Bertazi kfree(um); 184*9d53690fSGabriel Krisman Bertazi } 185*9d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_unload); 186*9d53690fSGabriel Krisman Bertazi 187*9d53690fSGabriel Krisman Bertazi MODULE_LICENSE("GPL v2"); 188