19d53690fSGabriel Krisman Bertazi /* SPDX-License-Identifier: GPL-2.0 */ 29d53690fSGabriel Krisman Bertazi #include <linux/module.h> 39d53690fSGabriel Krisman Bertazi #include <linux/kernel.h> 49d53690fSGabriel Krisman Bertazi #include <linux/string.h> 59d53690fSGabriel Krisman Bertazi #include <linux/slab.h> 69d53690fSGabriel Krisman Bertazi #include <linux/parser.h> 79d53690fSGabriel Krisman Bertazi #include <linux/errno.h> 89d53690fSGabriel Krisman Bertazi #include <linux/unicode.h> 93d7bfea8SDaniel Rosenberg #include <linux/stringhash.h> 109d53690fSGabriel Krisman Bertazi 119d53690fSGabriel Krisman Bertazi #include "utf8n.h" 129d53690fSGabriel Krisman Bertazi 139d53690fSGabriel Krisman Bertazi int utf8_validate(const struct unicode_map *um, const struct qstr *str) 149d53690fSGabriel Krisman Bertazi { 159d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 169d53690fSGabriel Krisman Bertazi 179d53690fSGabriel Krisman Bertazi if (utf8nlen(data, str->name, str->len) < 0) 189d53690fSGabriel Krisman Bertazi return -1; 199d53690fSGabriel Krisman Bertazi return 0; 209d53690fSGabriel Krisman Bertazi } 219d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_validate); 229d53690fSGabriel Krisman Bertazi 239d53690fSGabriel Krisman Bertazi int utf8_strncmp(const struct unicode_map *um, 249d53690fSGabriel Krisman Bertazi const struct qstr *s1, const struct qstr *s2) 259d53690fSGabriel Krisman Bertazi { 269d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 279d53690fSGabriel Krisman Bertazi struct utf8cursor cur1, cur2; 289d53690fSGabriel Krisman Bertazi int c1, c2; 299d53690fSGabriel Krisman Bertazi 309d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 319d53690fSGabriel Krisman Bertazi return -EINVAL; 329d53690fSGabriel Krisman Bertazi 339d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) 349d53690fSGabriel Krisman Bertazi return -EINVAL; 359d53690fSGabriel Krisman Bertazi 369d53690fSGabriel Krisman Bertazi do { 379d53690fSGabriel Krisman Bertazi c1 = utf8byte(&cur1); 389d53690fSGabriel Krisman Bertazi c2 = utf8byte(&cur2); 399d53690fSGabriel Krisman Bertazi 409d53690fSGabriel Krisman Bertazi if (c1 < 0 || c2 < 0) 419d53690fSGabriel Krisman Bertazi return -EINVAL; 429d53690fSGabriel Krisman Bertazi if (c1 != c2) 439d53690fSGabriel Krisman Bertazi return 1; 449d53690fSGabriel Krisman Bertazi } while (c1); 459d53690fSGabriel Krisman Bertazi 469d53690fSGabriel Krisman Bertazi return 0; 479d53690fSGabriel Krisman Bertazi } 489d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncmp); 499d53690fSGabriel Krisman Bertazi 509d53690fSGabriel Krisman Bertazi int utf8_strncasecmp(const struct unicode_map *um, 519d53690fSGabriel Krisman Bertazi const struct qstr *s1, const struct qstr *s2) 529d53690fSGabriel Krisman Bertazi { 539d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdicf(um->version); 549d53690fSGabriel Krisman Bertazi struct utf8cursor cur1, cur2; 559d53690fSGabriel Krisman Bertazi int c1, c2; 569d53690fSGabriel Krisman Bertazi 579d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 589d53690fSGabriel Krisman Bertazi return -EINVAL; 599d53690fSGabriel Krisman Bertazi 609d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) 619d53690fSGabriel Krisman Bertazi return -EINVAL; 629d53690fSGabriel Krisman Bertazi 639d53690fSGabriel Krisman Bertazi do { 649d53690fSGabriel Krisman Bertazi c1 = utf8byte(&cur1); 659d53690fSGabriel Krisman Bertazi c2 = utf8byte(&cur2); 669d53690fSGabriel Krisman Bertazi 679d53690fSGabriel Krisman Bertazi if (c1 < 0 || c2 < 0) 689d53690fSGabriel Krisman Bertazi return -EINVAL; 699d53690fSGabriel Krisman Bertazi if (c1 != c2) 709d53690fSGabriel Krisman Bertazi return 1; 719d53690fSGabriel Krisman Bertazi } while (c1); 729d53690fSGabriel Krisman Bertazi 739d53690fSGabriel Krisman Bertazi return 0; 749d53690fSGabriel Krisman Bertazi } 759d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp); 769d53690fSGabriel Krisman Bertazi 773ae72562SGabriel Krisman Bertazi /* String cf is expected to be a valid UTF-8 casefolded 783ae72562SGabriel Krisman Bertazi * string. 793ae72562SGabriel Krisman Bertazi */ 803ae72562SGabriel Krisman Bertazi int utf8_strncasecmp_folded(const struct unicode_map *um, 813ae72562SGabriel Krisman Bertazi const struct qstr *cf, 823ae72562SGabriel Krisman Bertazi const struct qstr *s1) 833ae72562SGabriel Krisman Bertazi { 843ae72562SGabriel Krisman Bertazi const struct utf8data *data = utf8nfdicf(um->version); 853ae72562SGabriel Krisman Bertazi struct utf8cursor cur1; 863ae72562SGabriel Krisman Bertazi int c1, c2; 873ae72562SGabriel Krisman Bertazi int i = 0; 883ae72562SGabriel Krisman Bertazi 893ae72562SGabriel Krisman Bertazi if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 903ae72562SGabriel Krisman Bertazi return -EINVAL; 913ae72562SGabriel Krisman Bertazi 923ae72562SGabriel Krisman Bertazi do { 933ae72562SGabriel Krisman Bertazi c1 = utf8byte(&cur1); 943ae72562SGabriel Krisman Bertazi c2 = cf->name[i++]; 953ae72562SGabriel Krisman Bertazi if (c1 < 0) 963ae72562SGabriel Krisman Bertazi return -EINVAL; 973ae72562SGabriel Krisman Bertazi if (c1 != c2) 983ae72562SGabriel Krisman Bertazi return 1; 993ae72562SGabriel Krisman Bertazi } while (c1); 1003ae72562SGabriel Krisman Bertazi 1013ae72562SGabriel Krisman Bertazi return 0; 1023ae72562SGabriel Krisman Bertazi } 1033ae72562SGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp_folded); 1043ae72562SGabriel Krisman Bertazi 1059d53690fSGabriel Krisman Bertazi int utf8_casefold(const struct unicode_map *um, const struct qstr *str, 1069d53690fSGabriel Krisman Bertazi unsigned char *dest, size_t dlen) 1079d53690fSGabriel Krisman Bertazi { 1089d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdicf(um->version); 1099d53690fSGabriel Krisman Bertazi struct utf8cursor cur; 1109d53690fSGabriel Krisman Bertazi size_t nlen = 0; 1119d53690fSGabriel Krisman Bertazi 1129d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur, data, str->name, str->len) < 0) 1139d53690fSGabriel Krisman Bertazi return -EINVAL; 1149d53690fSGabriel Krisman Bertazi 1159d53690fSGabriel Krisman Bertazi for (nlen = 0; nlen < dlen; nlen++) { 1169d53690fSGabriel Krisman Bertazi int c = utf8byte(&cur); 1179d53690fSGabriel Krisman Bertazi 1189d53690fSGabriel Krisman Bertazi dest[nlen] = c; 1199d53690fSGabriel Krisman Bertazi if (!c) 1209d53690fSGabriel Krisman Bertazi return nlen; 1219d53690fSGabriel Krisman Bertazi if (c == -1) 1229d53690fSGabriel Krisman Bertazi break; 1239d53690fSGabriel Krisman Bertazi } 1249d53690fSGabriel Krisman Bertazi return -EINVAL; 1259d53690fSGabriel Krisman Bertazi } 1269d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_casefold); 1279d53690fSGabriel Krisman Bertazi 1283d7bfea8SDaniel Rosenberg int utf8_casefold_hash(const struct unicode_map *um, const void *salt, 1293d7bfea8SDaniel Rosenberg struct qstr *str) 1303d7bfea8SDaniel Rosenberg { 1313d7bfea8SDaniel Rosenberg const struct utf8data *data = utf8nfdicf(um->version); 1323d7bfea8SDaniel Rosenberg struct utf8cursor cur; 1333d7bfea8SDaniel Rosenberg int c; 1343d7bfea8SDaniel Rosenberg unsigned long hash = init_name_hash(salt); 1353d7bfea8SDaniel Rosenberg 1363d7bfea8SDaniel Rosenberg if (utf8ncursor(&cur, data, str->name, str->len) < 0) 1373d7bfea8SDaniel Rosenberg return -EINVAL; 1383d7bfea8SDaniel Rosenberg 1393d7bfea8SDaniel Rosenberg while ((c = utf8byte(&cur))) { 1403d7bfea8SDaniel Rosenberg if (c < 0) 1413d7bfea8SDaniel Rosenberg return -EINVAL; 1423d7bfea8SDaniel Rosenberg hash = partial_name_hash((unsigned char)c, hash); 1433d7bfea8SDaniel Rosenberg } 1443d7bfea8SDaniel Rosenberg str->hash = end_name_hash(hash); 1453d7bfea8SDaniel Rosenberg return 0; 1463d7bfea8SDaniel Rosenberg } 1473d7bfea8SDaniel Rosenberg EXPORT_SYMBOL(utf8_casefold_hash); 1483d7bfea8SDaniel Rosenberg 1499d53690fSGabriel Krisman Bertazi int utf8_normalize(const struct unicode_map *um, const struct qstr *str, 1509d53690fSGabriel Krisman Bertazi unsigned char *dest, size_t dlen) 1519d53690fSGabriel Krisman Bertazi { 1529d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 1539d53690fSGabriel Krisman Bertazi struct utf8cursor cur; 1549d53690fSGabriel Krisman Bertazi ssize_t nlen = 0; 1559d53690fSGabriel Krisman Bertazi 1569d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur, data, str->name, str->len) < 0) 1579d53690fSGabriel Krisman Bertazi return -EINVAL; 1589d53690fSGabriel Krisman Bertazi 1599d53690fSGabriel Krisman Bertazi for (nlen = 0; nlen < dlen; nlen++) { 1609d53690fSGabriel Krisman Bertazi int c = utf8byte(&cur); 1619d53690fSGabriel Krisman Bertazi 1629d53690fSGabriel Krisman Bertazi dest[nlen] = c; 1639d53690fSGabriel Krisman Bertazi if (!c) 1649d53690fSGabriel Krisman Bertazi return nlen; 1659d53690fSGabriel Krisman Bertazi if (c == -1) 1669d53690fSGabriel Krisman Bertazi break; 1679d53690fSGabriel Krisman Bertazi } 1689d53690fSGabriel Krisman Bertazi return -EINVAL; 1699d53690fSGabriel Krisman Bertazi } 1709d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_normalize); 1719d53690fSGabriel Krisman Bertazi 172*49bd03ccSChristoph Hellwig struct unicode_map *utf8_load(unsigned int version) 1739d53690fSGabriel Krisman Bertazi { 174*49bd03ccSChristoph Hellwig struct unicode_map *um; 1759d53690fSGabriel Krisman Bertazi 176*49bd03ccSChristoph Hellwig if (!utf8version_is_supported(version)) 1779d53690fSGabriel Krisman Bertazi return ERR_PTR(-EINVAL); 1789d53690fSGabriel Krisman Bertazi 1799d53690fSGabriel Krisman Bertazi um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL); 1809d53690fSGabriel Krisman Bertazi if (!um) 1819d53690fSGabriel Krisman Bertazi return ERR_PTR(-ENOMEM); 182*49bd03ccSChristoph Hellwig um->version = version; 1839d53690fSGabriel Krisman Bertazi return um; 1849d53690fSGabriel Krisman Bertazi } 1859d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_load); 1869d53690fSGabriel Krisman Bertazi 1879d53690fSGabriel Krisman Bertazi void utf8_unload(struct unicode_map *um) 1889d53690fSGabriel Krisman Bertazi { 1899d53690fSGabriel Krisman Bertazi kfree(um); 1909d53690fSGabriel Krisman Bertazi } 1919d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_unload); 1929d53690fSGabriel Krisman Bertazi 1939d53690fSGabriel Krisman Bertazi MODULE_LICENSE("GPL v2"); 194