19d53690fSGabriel Krisman Bertazi /* SPDX-License-Identifier: GPL-2.0 */ 29d53690fSGabriel Krisman Bertazi #include <linux/module.h> 39d53690fSGabriel Krisman Bertazi #include <linux/kernel.h> 49d53690fSGabriel Krisman Bertazi #include <linux/string.h> 59d53690fSGabriel Krisman Bertazi #include <linux/slab.h> 69d53690fSGabriel Krisman Bertazi #include <linux/parser.h> 79d53690fSGabriel Krisman Bertazi #include <linux/errno.h> 83d7bfea8SDaniel Rosenberg #include <linux/stringhash.h> 99d53690fSGabriel Krisman Bertazi 109d53690fSGabriel Krisman Bertazi #include "utf8n.h" 119d53690fSGabriel Krisman Bertazi 129d53690fSGabriel Krisman Bertazi int utf8_validate(const struct unicode_map *um, const struct qstr *str) 139d53690fSGabriel Krisman Bertazi { 146ca99ce7SChristoph Hellwig if (utf8nlen(um, UTF8_NFDI, str->name, str->len) < 0) 159d53690fSGabriel Krisman Bertazi return -1; 169d53690fSGabriel Krisman Bertazi return 0; 179d53690fSGabriel Krisman Bertazi } 189d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_validate); 199d53690fSGabriel Krisman Bertazi 209d53690fSGabriel Krisman Bertazi int utf8_strncmp(const struct unicode_map *um, 219d53690fSGabriel Krisman Bertazi const struct qstr *s1, const struct qstr *s2) 229d53690fSGabriel Krisman Bertazi { 239d53690fSGabriel Krisman Bertazi struct utf8cursor cur1, cur2; 249d53690fSGabriel Krisman Bertazi int c1, c2; 259d53690fSGabriel Krisman Bertazi 266ca99ce7SChristoph Hellwig if (utf8ncursor(&cur1, um, UTF8_NFDI, s1->name, s1->len) < 0) 279d53690fSGabriel Krisman Bertazi return -EINVAL; 289d53690fSGabriel Krisman Bertazi 296ca99ce7SChristoph Hellwig if (utf8ncursor(&cur2, um, UTF8_NFDI, s2->name, s2->len) < 0) 309d53690fSGabriel Krisman Bertazi return -EINVAL; 319d53690fSGabriel Krisman Bertazi 329d53690fSGabriel Krisman Bertazi do { 339d53690fSGabriel Krisman Bertazi c1 = utf8byte(&cur1); 349d53690fSGabriel Krisman Bertazi c2 = utf8byte(&cur2); 359d53690fSGabriel Krisman Bertazi 369d53690fSGabriel Krisman Bertazi if (c1 < 0 || c2 < 0) 379d53690fSGabriel Krisman Bertazi return -EINVAL; 389d53690fSGabriel Krisman Bertazi if (c1 != c2) 399d53690fSGabriel Krisman Bertazi return 1; 409d53690fSGabriel Krisman Bertazi } while (c1); 419d53690fSGabriel Krisman Bertazi 429d53690fSGabriel Krisman Bertazi return 0; 439d53690fSGabriel Krisman Bertazi } 449d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncmp); 459d53690fSGabriel Krisman Bertazi 469d53690fSGabriel Krisman Bertazi int utf8_strncasecmp(const struct unicode_map *um, 479d53690fSGabriel Krisman Bertazi const struct qstr *s1, const struct qstr *s2) 489d53690fSGabriel Krisman Bertazi { 499d53690fSGabriel Krisman Bertazi struct utf8cursor cur1, cur2; 509d53690fSGabriel Krisman Bertazi int c1, c2; 519d53690fSGabriel Krisman Bertazi 526ca99ce7SChristoph Hellwig if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0) 539d53690fSGabriel Krisman Bertazi return -EINVAL; 549d53690fSGabriel Krisman Bertazi 556ca99ce7SChristoph Hellwig if (utf8ncursor(&cur2, um, UTF8_NFDICF, s2->name, s2->len) < 0) 569d53690fSGabriel Krisman Bertazi return -EINVAL; 579d53690fSGabriel Krisman Bertazi 589d53690fSGabriel Krisman Bertazi do { 599d53690fSGabriel Krisman Bertazi c1 = utf8byte(&cur1); 609d53690fSGabriel Krisman Bertazi c2 = utf8byte(&cur2); 619d53690fSGabriel Krisman Bertazi 629d53690fSGabriel Krisman Bertazi if (c1 < 0 || c2 < 0) 639d53690fSGabriel Krisman Bertazi return -EINVAL; 649d53690fSGabriel Krisman Bertazi if (c1 != c2) 659d53690fSGabriel Krisman Bertazi return 1; 669d53690fSGabriel Krisman Bertazi } while (c1); 679d53690fSGabriel Krisman Bertazi 689d53690fSGabriel Krisman Bertazi return 0; 699d53690fSGabriel Krisman Bertazi } 709d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp); 719d53690fSGabriel Krisman Bertazi 723ae72562SGabriel Krisman Bertazi /* String cf is expected to be a valid UTF-8 casefolded 733ae72562SGabriel Krisman Bertazi * string. 743ae72562SGabriel Krisman Bertazi */ 753ae72562SGabriel Krisman Bertazi int utf8_strncasecmp_folded(const struct unicode_map *um, 763ae72562SGabriel Krisman Bertazi const struct qstr *cf, 773ae72562SGabriel Krisman Bertazi const struct qstr *s1) 783ae72562SGabriel Krisman Bertazi { 793ae72562SGabriel Krisman Bertazi struct utf8cursor cur1; 803ae72562SGabriel Krisman Bertazi int c1, c2; 813ae72562SGabriel Krisman Bertazi int i = 0; 823ae72562SGabriel Krisman Bertazi 836ca99ce7SChristoph Hellwig if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0) 843ae72562SGabriel Krisman Bertazi return -EINVAL; 853ae72562SGabriel Krisman Bertazi 863ae72562SGabriel Krisman Bertazi do { 873ae72562SGabriel Krisman Bertazi c1 = utf8byte(&cur1); 883ae72562SGabriel Krisman Bertazi c2 = cf->name[i++]; 893ae72562SGabriel Krisman Bertazi if (c1 < 0) 903ae72562SGabriel Krisman Bertazi return -EINVAL; 913ae72562SGabriel Krisman Bertazi if (c1 != c2) 923ae72562SGabriel Krisman Bertazi return 1; 933ae72562SGabriel Krisman Bertazi } while (c1); 943ae72562SGabriel Krisman Bertazi 953ae72562SGabriel Krisman Bertazi return 0; 963ae72562SGabriel Krisman Bertazi } 973ae72562SGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp_folded); 983ae72562SGabriel Krisman Bertazi 999d53690fSGabriel Krisman Bertazi int utf8_casefold(const struct unicode_map *um, const struct qstr *str, 1009d53690fSGabriel Krisman Bertazi unsigned char *dest, size_t dlen) 1019d53690fSGabriel Krisman Bertazi { 1029d53690fSGabriel Krisman Bertazi struct utf8cursor cur; 1039d53690fSGabriel Krisman Bertazi size_t nlen = 0; 1049d53690fSGabriel Krisman Bertazi 1056ca99ce7SChristoph Hellwig if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0) 1069d53690fSGabriel Krisman Bertazi return -EINVAL; 1079d53690fSGabriel Krisman Bertazi 1089d53690fSGabriel Krisman Bertazi for (nlen = 0; nlen < dlen; nlen++) { 1099d53690fSGabriel Krisman Bertazi int c = utf8byte(&cur); 1109d53690fSGabriel Krisman Bertazi 1119d53690fSGabriel Krisman Bertazi dest[nlen] = c; 1129d53690fSGabriel Krisman Bertazi if (!c) 1139d53690fSGabriel Krisman Bertazi return nlen; 1149d53690fSGabriel Krisman Bertazi if (c == -1) 1159d53690fSGabriel Krisman Bertazi break; 1169d53690fSGabriel Krisman Bertazi } 1179d53690fSGabriel Krisman Bertazi return -EINVAL; 1189d53690fSGabriel Krisman Bertazi } 1199d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_casefold); 1209d53690fSGabriel Krisman Bertazi 1213d7bfea8SDaniel Rosenberg int utf8_casefold_hash(const struct unicode_map *um, const void *salt, 1223d7bfea8SDaniel Rosenberg struct qstr *str) 1233d7bfea8SDaniel Rosenberg { 1243d7bfea8SDaniel Rosenberg struct utf8cursor cur; 1253d7bfea8SDaniel Rosenberg int c; 1263d7bfea8SDaniel Rosenberg unsigned long hash = init_name_hash(salt); 1273d7bfea8SDaniel Rosenberg 1286ca99ce7SChristoph Hellwig if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0) 1293d7bfea8SDaniel Rosenberg return -EINVAL; 1303d7bfea8SDaniel Rosenberg 1313d7bfea8SDaniel Rosenberg while ((c = utf8byte(&cur))) { 1323d7bfea8SDaniel Rosenberg if (c < 0) 1333d7bfea8SDaniel Rosenberg return -EINVAL; 1343d7bfea8SDaniel Rosenberg hash = partial_name_hash((unsigned char)c, hash); 1353d7bfea8SDaniel Rosenberg } 1363d7bfea8SDaniel Rosenberg str->hash = end_name_hash(hash); 1373d7bfea8SDaniel Rosenberg return 0; 1383d7bfea8SDaniel Rosenberg } 1393d7bfea8SDaniel Rosenberg EXPORT_SYMBOL(utf8_casefold_hash); 1403d7bfea8SDaniel Rosenberg 1419d53690fSGabriel Krisman Bertazi int utf8_normalize(const struct unicode_map *um, const struct qstr *str, 1429d53690fSGabriel Krisman Bertazi unsigned char *dest, size_t dlen) 1439d53690fSGabriel Krisman Bertazi { 1449d53690fSGabriel Krisman Bertazi struct utf8cursor cur; 1459d53690fSGabriel Krisman Bertazi ssize_t nlen = 0; 1469d53690fSGabriel Krisman Bertazi 1476ca99ce7SChristoph Hellwig if (utf8ncursor(&cur, um, UTF8_NFDI, str->name, str->len) < 0) 1489d53690fSGabriel Krisman Bertazi return -EINVAL; 1499d53690fSGabriel Krisman Bertazi 1509d53690fSGabriel Krisman Bertazi for (nlen = 0; nlen < dlen; nlen++) { 1519d53690fSGabriel Krisman Bertazi int c = utf8byte(&cur); 1529d53690fSGabriel Krisman Bertazi 1539d53690fSGabriel Krisman Bertazi dest[nlen] = c; 1549d53690fSGabriel Krisman Bertazi if (!c) 1559d53690fSGabriel Krisman Bertazi return nlen; 1569d53690fSGabriel Krisman Bertazi if (c == -1) 1579d53690fSGabriel Krisman Bertazi break; 1589d53690fSGabriel Krisman Bertazi } 1599d53690fSGabriel Krisman Bertazi return -EINVAL; 1609d53690fSGabriel Krisman Bertazi } 1619d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_normalize); 1629d53690fSGabriel Krisman Bertazi 1632b3d0478SChristoph Hellwig static const struct utf8data *find_table_version(const struct utf8data *table, 1642b3d0478SChristoph Hellwig size_t nr_entries, unsigned int version) 1652b3d0478SChristoph Hellwig { 1662b3d0478SChristoph Hellwig size_t i = nr_entries - 1; 1672b3d0478SChristoph Hellwig 1682b3d0478SChristoph Hellwig while (version < table[i].maxage) 1692b3d0478SChristoph Hellwig i--; 1702b3d0478SChristoph Hellwig if (version > table[i].maxage) 1712b3d0478SChristoph Hellwig return NULL; 1722b3d0478SChristoph Hellwig return &table[i]; 1732b3d0478SChristoph Hellwig } 1742b3d0478SChristoph Hellwig 17549bd03ccSChristoph Hellwig struct unicode_map *utf8_load(unsigned int version) 1769d53690fSGabriel Krisman Bertazi { 17749bd03ccSChristoph Hellwig struct unicode_map *um; 1789d53690fSGabriel Krisman Bertazi 1799d53690fSGabriel Krisman Bertazi um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL); 1809d53690fSGabriel Krisman Bertazi if (!um) 1819d53690fSGabriel Krisman Bertazi return ERR_PTR(-ENOMEM); 18249bd03ccSChristoph Hellwig um->version = version; 1832b3d0478SChristoph Hellwig 1842b3d0478SChristoph Hellwig um->tables = symbol_request(utf8_data_table); 1852b3d0478SChristoph Hellwig if (!um->tables) 1862b3d0478SChristoph Hellwig goto out_free_um; 1872b3d0478SChristoph Hellwig 1882b3d0478SChristoph Hellwig if (!utf8version_is_supported(um, version)) 1892b3d0478SChristoph Hellwig goto out_symbol_put; 1902b3d0478SChristoph Hellwig um->ntab[UTF8_NFDI] = find_table_version(um->tables->utf8nfdidata, 1912b3d0478SChristoph Hellwig um->tables->utf8nfdidata_size, um->version); 1926ca99ce7SChristoph Hellwig if (!um->ntab[UTF8_NFDI]) 1932b3d0478SChristoph Hellwig goto out_symbol_put; 1942b3d0478SChristoph Hellwig um->ntab[UTF8_NFDICF] = find_table_version(um->tables->utf8nfdicfdata, 1952b3d0478SChristoph Hellwig um->tables->utf8nfdicfdata_size, um->version); 1966ca99ce7SChristoph Hellwig if (!um->ntab[UTF8_NFDICF]) 1972b3d0478SChristoph Hellwig goto out_symbol_put; 1989d53690fSGabriel Krisman Bertazi return um; 1996ca99ce7SChristoph Hellwig 2002b3d0478SChristoph Hellwig out_symbol_put: 201*c4b6c178SAndré Almeida symbol_put(utf8_data_table); 2026ca99ce7SChristoph Hellwig out_free_um: 2036ca99ce7SChristoph Hellwig kfree(um); 2046ca99ce7SChristoph Hellwig return ERR_PTR(-EINVAL); 2059d53690fSGabriel Krisman Bertazi } 2069d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_load); 2079d53690fSGabriel Krisman Bertazi 2089d53690fSGabriel Krisman Bertazi void utf8_unload(struct unicode_map *um) 2099d53690fSGabriel Krisman Bertazi { 2102b3d0478SChristoph Hellwig if (um) { 2112b3d0478SChristoph Hellwig symbol_put(utf8_data_table); 2129d53690fSGabriel Krisman Bertazi kfree(um); 2139d53690fSGabriel Krisman Bertazi } 2142b3d0478SChristoph Hellwig } 2159d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_unload); 2169d53690fSGabriel Krisman Bertazi 217