19d53690fSGabriel Krisman Bertazi /* SPDX-License-Identifier: GPL-2.0 */ 29d53690fSGabriel Krisman Bertazi #include <linux/module.h> 39d53690fSGabriel Krisman Bertazi #include <linux/kernel.h> 49d53690fSGabriel Krisman Bertazi #include <linux/string.h> 59d53690fSGabriel Krisman Bertazi #include <linux/slab.h> 69d53690fSGabriel Krisman Bertazi #include <linux/parser.h> 79d53690fSGabriel Krisman Bertazi #include <linux/errno.h> 89d53690fSGabriel Krisman Bertazi #include <linux/unicode.h> 9*3d7bfea8SDaniel Rosenberg #include <linux/stringhash.h> 109d53690fSGabriel Krisman Bertazi 119d53690fSGabriel Krisman Bertazi #include "utf8n.h" 129d53690fSGabriel Krisman Bertazi 139d53690fSGabriel Krisman Bertazi int utf8_validate(const struct unicode_map *um, const struct qstr *str) 149d53690fSGabriel Krisman Bertazi { 159d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 169d53690fSGabriel Krisman Bertazi 179d53690fSGabriel Krisman Bertazi if (utf8nlen(data, str->name, str->len) < 0) 189d53690fSGabriel Krisman Bertazi return -1; 199d53690fSGabriel Krisman Bertazi return 0; 209d53690fSGabriel Krisman Bertazi } 219d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_validate); 229d53690fSGabriel Krisman Bertazi 239d53690fSGabriel Krisman Bertazi int utf8_strncmp(const struct unicode_map *um, 249d53690fSGabriel Krisman Bertazi const struct qstr *s1, const struct qstr *s2) 259d53690fSGabriel Krisman Bertazi { 269d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 279d53690fSGabriel Krisman Bertazi struct utf8cursor cur1, cur2; 289d53690fSGabriel Krisman Bertazi int c1, c2; 299d53690fSGabriel Krisman Bertazi 309d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 319d53690fSGabriel Krisman Bertazi return -EINVAL; 329d53690fSGabriel Krisman Bertazi 339d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) 349d53690fSGabriel Krisman Bertazi return -EINVAL; 359d53690fSGabriel Krisman Bertazi 369d53690fSGabriel Krisman Bertazi do { 379d53690fSGabriel Krisman Bertazi c1 = utf8byte(&cur1); 389d53690fSGabriel Krisman Bertazi c2 = utf8byte(&cur2); 399d53690fSGabriel Krisman Bertazi 409d53690fSGabriel Krisman Bertazi if (c1 < 0 || c2 < 0) 419d53690fSGabriel Krisman Bertazi return -EINVAL; 429d53690fSGabriel Krisman Bertazi if (c1 != c2) 439d53690fSGabriel Krisman Bertazi return 1; 449d53690fSGabriel Krisman Bertazi } while (c1); 459d53690fSGabriel Krisman Bertazi 469d53690fSGabriel Krisman Bertazi return 0; 479d53690fSGabriel Krisman Bertazi } 489d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncmp); 499d53690fSGabriel Krisman Bertazi 509d53690fSGabriel Krisman Bertazi int utf8_strncasecmp(const struct unicode_map *um, 519d53690fSGabriel Krisman Bertazi const struct qstr *s1, const struct qstr *s2) 529d53690fSGabriel Krisman Bertazi { 539d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdicf(um->version); 549d53690fSGabriel Krisman Bertazi struct utf8cursor cur1, cur2; 559d53690fSGabriel Krisman Bertazi int c1, c2; 569d53690fSGabriel Krisman Bertazi 579d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 589d53690fSGabriel Krisman Bertazi return -EINVAL; 599d53690fSGabriel Krisman Bertazi 609d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) 619d53690fSGabriel Krisman Bertazi return -EINVAL; 629d53690fSGabriel Krisman Bertazi 639d53690fSGabriel Krisman Bertazi do { 649d53690fSGabriel Krisman Bertazi c1 = utf8byte(&cur1); 659d53690fSGabriel Krisman Bertazi c2 = utf8byte(&cur2); 669d53690fSGabriel Krisman Bertazi 679d53690fSGabriel Krisman Bertazi if (c1 < 0 || c2 < 0) 689d53690fSGabriel Krisman Bertazi return -EINVAL; 699d53690fSGabriel Krisman Bertazi if (c1 != c2) 709d53690fSGabriel Krisman Bertazi return 1; 719d53690fSGabriel Krisman Bertazi } while (c1); 729d53690fSGabriel Krisman Bertazi 739d53690fSGabriel Krisman Bertazi return 0; 749d53690fSGabriel Krisman Bertazi } 759d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp); 769d53690fSGabriel Krisman Bertazi 773ae72562SGabriel Krisman Bertazi /* String cf is expected to be a valid UTF-8 casefolded 783ae72562SGabriel Krisman Bertazi * string. 793ae72562SGabriel Krisman Bertazi */ 803ae72562SGabriel Krisman Bertazi int utf8_strncasecmp_folded(const struct unicode_map *um, 813ae72562SGabriel Krisman Bertazi const struct qstr *cf, 823ae72562SGabriel Krisman Bertazi const struct qstr *s1) 833ae72562SGabriel Krisman Bertazi { 843ae72562SGabriel Krisman Bertazi const struct utf8data *data = utf8nfdicf(um->version); 853ae72562SGabriel Krisman Bertazi struct utf8cursor cur1; 863ae72562SGabriel Krisman Bertazi int c1, c2; 873ae72562SGabriel Krisman Bertazi int i = 0; 883ae72562SGabriel Krisman Bertazi 893ae72562SGabriel Krisman Bertazi if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 903ae72562SGabriel Krisman Bertazi return -EINVAL; 913ae72562SGabriel Krisman Bertazi 923ae72562SGabriel Krisman Bertazi do { 933ae72562SGabriel Krisman Bertazi c1 = utf8byte(&cur1); 943ae72562SGabriel Krisman Bertazi c2 = cf->name[i++]; 953ae72562SGabriel Krisman Bertazi if (c1 < 0) 963ae72562SGabriel Krisman Bertazi return -EINVAL; 973ae72562SGabriel Krisman Bertazi if (c1 != c2) 983ae72562SGabriel Krisman Bertazi return 1; 993ae72562SGabriel Krisman Bertazi } while (c1); 1003ae72562SGabriel Krisman Bertazi 1013ae72562SGabriel Krisman Bertazi return 0; 1023ae72562SGabriel Krisman Bertazi } 1033ae72562SGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp_folded); 1043ae72562SGabriel Krisman Bertazi 1059d53690fSGabriel Krisman Bertazi int utf8_casefold(const struct unicode_map *um, const struct qstr *str, 1069d53690fSGabriel Krisman Bertazi unsigned char *dest, size_t dlen) 1079d53690fSGabriel Krisman Bertazi { 1089d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdicf(um->version); 1099d53690fSGabriel Krisman Bertazi struct utf8cursor cur; 1109d53690fSGabriel Krisman Bertazi size_t nlen = 0; 1119d53690fSGabriel Krisman Bertazi 1129d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur, data, str->name, str->len) < 0) 1139d53690fSGabriel Krisman Bertazi return -EINVAL; 1149d53690fSGabriel Krisman Bertazi 1159d53690fSGabriel Krisman Bertazi for (nlen = 0; nlen < dlen; nlen++) { 1169d53690fSGabriel Krisman Bertazi int c = utf8byte(&cur); 1179d53690fSGabriel Krisman Bertazi 1189d53690fSGabriel Krisman Bertazi dest[nlen] = c; 1199d53690fSGabriel Krisman Bertazi if (!c) 1209d53690fSGabriel Krisman Bertazi return nlen; 1219d53690fSGabriel Krisman Bertazi if (c == -1) 1229d53690fSGabriel Krisman Bertazi break; 1239d53690fSGabriel Krisman Bertazi } 1249d53690fSGabriel Krisman Bertazi return -EINVAL; 1259d53690fSGabriel Krisman Bertazi } 1269d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_casefold); 1279d53690fSGabriel Krisman Bertazi 128*3d7bfea8SDaniel Rosenberg int utf8_casefold_hash(const struct unicode_map *um, const void *salt, 129*3d7bfea8SDaniel Rosenberg struct qstr *str) 130*3d7bfea8SDaniel Rosenberg { 131*3d7bfea8SDaniel Rosenberg const struct utf8data *data = utf8nfdicf(um->version); 132*3d7bfea8SDaniel Rosenberg struct utf8cursor cur; 133*3d7bfea8SDaniel Rosenberg int c; 134*3d7bfea8SDaniel Rosenberg unsigned long hash = init_name_hash(salt); 135*3d7bfea8SDaniel Rosenberg 136*3d7bfea8SDaniel Rosenberg if (utf8ncursor(&cur, data, str->name, str->len) < 0) 137*3d7bfea8SDaniel Rosenberg return -EINVAL; 138*3d7bfea8SDaniel Rosenberg 139*3d7bfea8SDaniel Rosenberg while ((c = utf8byte(&cur))) { 140*3d7bfea8SDaniel Rosenberg if (c < 0) 141*3d7bfea8SDaniel Rosenberg return -EINVAL; 142*3d7bfea8SDaniel Rosenberg hash = partial_name_hash((unsigned char)c, hash); 143*3d7bfea8SDaniel Rosenberg } 144*3d7bfea8SDaniel Rosenberg str->hash = end_name_hash(hash); 145*3d7bfea8SDaniel Rosenberg return 0; 146*3d7bfea8SDaniel Rosenberg } 147*3d7bfea8SDaniel Rosenberg EXPORT_SYMBOL(utf8_casefold_hash); 148*3d7bfea8SDaniel Rosenberg 1499d53690fSGabriel Krisman Bertazi int utf8_normalize(const struct unicode_map *um, const struct qstr *str, 1509d53690fSGabriel Krisman Bertazi unsigned char *dest, size_t dlen) 1519d53690fSGabriel Krisman Bertazi { 1529d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 1539d53690fSGabriel Krisman Bertazi struct utf8cursor cur; 1549d53690fSGabriel Krisman Bertazi ssize_t nlen = 0; 1559d53690fSGabriel Krisman Bertazi 1569d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur, data, str->name, str->len) < 0) 1579d53690fSGabriel Krisman Bertazi return -EINVAL; 1589d53690fSGabriel Krisman Bertazi 1599d53690fSGabriel Krisman Bertazi for (nlen = 0; nlen < dlen; nlen++) { 1609d53690fSGabriel Krisman Bertazi int c = utf8byte(&cur); 1619d53690fSGabriel Krisman Bertazi 1629d53690fSGabriel Krisman Bertazi dest[nlen] = c; 1639d53690fSGabriel Krisman Bertazi if (!c) 1649d53690fSGabriel Krisman Bertazi return nlen; 1659d53690fSGabriel Krisman Bertazi if (c == -1) 1669d53690fSGabriel Krisman Bertazi break; 1679d53690fSGabriel Krisman Bertazi } 1689d53690fSGabriel Krisman Bertazi return -EINVAL; 1699d53690fSGabriel Krisman Bertazi } 1709d53690fSGabriel Krisman Bertazi 1719d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_normalize); 1729d53690fSGabriel Krisman Bertazi 1739d53690fSGabriel Krisman Bertazi static int utf8_parse_version(const char *version, unsigned int *maj, 1749d53690fSGabriel Krisman Bertazi unsigned int *min, unsigned int *rev) 1759d53690fSGabriel Krisman Bertazi { 1769d53690fSGabriel Krisman Bertazi substring_t args[3]; 1779d53690fSGabriel Krisman Bertazi char version_string[12]; 178aa28b98dSColin Ian King static const struct match_token token[] = { 1799d53690fSGabriel Krisman Bertazi {1, "%d.%d.%d"}, 1809d53690fSGabriel Krisman Bertazi {0, NULL} 1819d53690fSGabriel Krisman Bertazi }; 1829d53690fSGabriel Krisman Bertazi 1839d53690fSGabriel Krisman Bertazi strncpy(version_string, version, sizeof(version_string)); 1849d53690fSGabriel Krisman Bertazi 1859d53690fSGabriel Krisman Bertazi if (match_token(version_string, token, args) != 1) 1869d53690fSGabriel Krisman Bertazi return -EINVAL; 1879d53690fSGabriel Krisman Bertazi 1889d53690fSGabriel Krisman Bertazi if (match_int(&args[0], maj) || match_int(&args[1], min) || 1899d53690fSGabriel Krisman Bertazi match_int(&args[2], rev)) 1909d53690fSGabriel Krisman Bertazi return -EINVAL; 1919d53690fSGabriel Krisman Bertazi 1929d53690fSGabriel Krisman Bertazi return 0; 1939d53690fSGabriel Krisman Bertazi } 1949d53690fSGabriel Krisman Bertazi 1959d53690fSGabriel Krisman Bertazi struct unicode_map *utf8_load(const char *version) 1969d53690fSGabriel Krisman Bertazi { 1979d53690fSGabriel Krisman Bertazi struct unicode_map *um = NULL; 1989d53690fSGabriel Krisman Bertazi int unicode_version; 1999d53690fSGabriel Krisman Bertazi 2009d53690fSGabriel Krisman Bertazi if (version) { 2019d53690fSGabriel Krisman Bertazi unsigned int maj, min, rev; 2029d53690fSGabriel Krisman Bertazi 2039d53690fSGabriel Krisman Bertazi if (utf8_parse_version(version, &maj, &min, &rev) < 0) 2049d53690fSGabriel Krisman Bertazi return ERR_PTR(-EINVAL); 2059d53690fSGabriel Krisman Bertazi 2069d53690fSGabriel Krisman Bertazi if (!utf8version_is_supported(maj, min, rev)) 2079d53690fSGabriel Krisman Bertazi return ERR_PTR(-EINVAL); 2089d53690fSGabriel Krisman Bertazi 2099d53690fSGabriel Krisman Bertazi unicode_version = UNICODE_AGE(maj, min, rev); 2109d53690fSGabriel Krisman Bertazi } else { 2119d53690fSGabriel Krisman Bertazi unicode_version = utf8version_latest(); 2129d53690fSGabriel Krisman Bertazi printk(KERN_WARNING"UTF-8 version not specified. " 2139d53690fSGabriel Krisman Bertazi "Assuming latest supported version (%d.%d.%d).", 2149d53690fSGabriel Krisman Bertazi (unicode_version >> 16) & 0xff, 2159d53690fSGabriel Krisman Bertazi (unicode_version >> 8) & 0xff, 2169d53690fSGabriel Krisman Bertazi (unicode_version & 0xff)); 2179d53690fSGabriel Krisman Bertazi } 2189d53690fSGabriel Krisman Bertazi 2199d53690fSGabriel Krisman Bertazi um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL); 2209d53690fSGabriel Krisman Bertazi if (!um) 2219d53690fSGabriel Krisman Bertazi return ERR_PTR(-ENOMEM); 2229d53690fSGabriel Krisman Bertazi 2239d53690fSGabriel Krisman Bertazi um->charset = "UTF-8"; 2249d53690fSGabriel Krisman Bertazi um->version = unicode_version; 2259d53690fSGabriel Krisman Bertazi 2269d53690fSGabriel Krisman Bertazi return um; 2279d53690fSGabriel Krisman Bertazi } 2289d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_load); 2299d53690fSGabriel Krisman Bertazi 2309d53690fSGabriel Krisman Bertazi void utf8_unload(struct unicode_map *um) 2319d53690fSGabriel Krisman Bertazi { 2329d53690fSGabriel Krisman Bertazi kfree(um); 2339d53690fSGabriel Krisman Bertazi } 2349d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_unload); 2359d53690fSGabriel Krisman Bertazi 2369d53690fSGabriel Krisman Bertazi MODULE_LICENSE("GPL v2"); 237