19d53690fSGabriel Krisman Bertazi /* SPDX-License-Identifier: GPL-2.0 */ 29d53690fSGabriel Krisman Bertazi #include <linux/module.h> 39d53690fSGabriel Krisman Bertazi #include <linux/kernel.h> 49d53690fSGabriel Krisman Bertazi #include <linux/string.h> 59d53690fSGabriel Krisman Bertazi #include <linux/slab.h> 69d53690fSGabriel Krisman Bertazi #include <linux/parser.h> 79d53690fSGabriel Krisman Bertazi #include <linux/errno.h> 89d53690fSGabriel Krisman Bertazi #include <linux/unicode.h> 99d53690fSGabriel Krisman Bertazi 109d53690fSGabriel Krisman Bertazi #include "utf8n.h" 119d53690fSGabriel Krisman Bertazi 129d53690fSGabriel Krisman Bertazi int utf8_validate(const struct unicode_map *um, const struct qstr *str) 139d53690fSGabriel Krisman Bertazi { 149d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 159d53690fSGabriel Krisman Bertazi 169d53690fSGabriel Krisman Bertazi if (utf8nlen(data, str->name, str->len) < 0) 179d53690fSGabriel Krisman Bertazi return -1; 189d53690fSGabriel Krisman Bertazi return 0; 199d53690fSGabriel Krisman Bertazi } 209d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_validate); 219d53690fSGabriel Krisman Bertazi 229d53690fSGabriel Krisman Bertazi int utf8_strncmp(const struct unicode_map *um, 239d53690fSGabriel Krisman Bertazi const struct qstr *s1, const struct qstr *s2) 249d53690fSGabriel Krisman Bertazi { 259d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 269d53690fSGabriel Krisman Bertazi struct utf8cursor cur1, cur2; 279d53690fSGabriel Krisman Bertazi int c1, c2; 289d53690fSGabriel Krisman Bertazi 299d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 309d53690fSGabriel Krisman Bertazi return -EINVAL; 319d53690fSGabriel Krisman Bertazi 329d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) 339d53690fSGabriel Krisman Bertazi return -EINVAL; 349d53690fSGabriel Krisman Bertazi 359d53690fSGabriel Krisman Bertazi do { 369d53690fSGabriel Krisman Bertazi c1 = utf8byte(&cur1); 379d53690fSGabriel Krisman Bertazi c2 = utf8byte(&cur2); 389d53690fSGabriel Krisman Bertazi 399d53690fSGabriel Krisman Bertazi if (c1 < 0 || c2 < 0) 409d53690fSGabriel Krisman Bertazi return -EINVAL; 419d53690fSGabriel Krisman Bertazi if (c1 != c2) 429d53690fSGabriel Krisman Bertazi return 1; 439d53690fSGabriel Krisman Bertazi } while (c1); 449d53690fSGabriel Krisman Bertazi 459d53690fSGabriel Krisman Bertazi return 0; 469d53690fSGabriel Krisman Bertazi } 479d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncmp); 489d53690fSGabriel Krisman Bertazi 499d53690fSGabriel Krisman Bertazi int utf8_strncasecmp(const struct unicode_map *um, 509d53690fSGabriel Krisman Bertazi const struct qstr *s1, const struct qstr *s2) 519d53690fSGabriel Krisman Bertazi { 529d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdicf(um->version); 539d53690fSGabriel Krisman Bertazi struct utf8cursor cur1, cur2; 549d53690fSGabriel Krisman Bertazi int c1, c2; 559d53690fSGabriel Krisman Bertazi 569d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 579d53690fSGabriel Krisman Bertazi return -EINVAL; 589d53690fSGabriel Krisman Bertazi 599d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) 609d53690fSGabriel Krisman Bertazi return -EINVAL; 619d53690fSGabriel Krisman Bertazi 629d53690fSGabriel Krisman Bertazi do { 639d53690fSGabriel Krisman Bertazi c1 = utf8byte(&cur1); 649d53690fSGabriel Krisman Bertazi c2 = utf8byte(&cur2); 659d53690fSGabriel Krisman Bertazi 669d53690fSGabriel Krisman Bertazi if (c1 < 0 || c2 < 0) 679d53690fSGabriel Krisman Bertazi return -EINVAL; 689d53690fSGabriel Krisman Bertazi if (c1 != c2) 699d53690fSGabriel Krisman Bertazi return 1; 709d53690fSGabriel Krisman Bertazi } while (c1); 719d53690fSGabriel Krisman Bertazi 729d53690fSGabriel Krisman Bertazi return 0; 739d53690fSGabriel Krisman Bertazi } 749d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp); 759d53690fSGabriel Krisman Bertazi 763ae72562SGabriel Krisman Bertazi /* String cf is expected to be a valid UTF-8 casefolded 773ae72562SGabriel Krisman Bertazi * string. 783ae72562SGabriel Krisman Bertazi */ 793ae72562SGabriel Krisman Bertazi int utf8_strncasecmp_folded(const struct unicode_map *um, 803ae72562SGabriel Krisman Bertazi const struct qstr *cf, 813ae72562SGabriel Krisman Bertazi const struct qstr *s1) 823ae72562SGabriel Krisman Bertazi { 833ae72562SGabriel Krisman Bertazi const struct utf8data *data = utf8nfdicf(um->version); 843ae72562SGabriel Krisman Bertazi struct utf8cursor cur1; 853ae72562SGabriel Krisman Bertazi int c1, c2; 863ae72562SGabriel Krisman Bertazi int i = 0; 873ae72562SGabriel Krisman Bertazi 883ae72562SGabriel Krisman Bertazi if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 893ae72562SGabriel Krisman Bertazi return -EINVAL; 903ae72562SGabriel Krisman Bertazi 913ae72562SGabriel Krisman Bertazi do { 923ae72562SGabriel Krisman Bertazi c1 = utf8byte(&cur1); 933ae72562SGabriel Krisman Bertazi c2 = cf->name[i++]; 943ae72562SGabriel Krisman Bertazi if (c1 < 0) 953ae72562SGabriel Krisman Bertazi return -EINVAL; 963ae72562SGabriel Krisman Bertazi if (c1 != c2) 973ae72562SGabriel Krisman Bertazi return 1; 983ae72562SGabriel Krisman Bertazi } while (c1); 993ae72562SGabriel Krisman Bertazi 1003ae72562SGabriel Krisman Bertazi return 0; 1013ae72562SGabriel Krisman Bertazi } 1023ae72562SGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_strncasecmp_folded); 1033ae72562SGabriel Krisman Bertazi 1049d53690fSGabriel Krisman Bertazi int utf8_casefold(const struct unicode_map *um, const struct qstr *str, 1059d53690fSGabriel Krisman Bertazi unsigned char *dest, size_t dlen) 1069d53690fSGabriel Krisman Bertazi { 1079d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdicf(um->version); 1089d53690fSGabriel Krisman Bertazi struct utf8cursor cur; 1099d53690fSGabriel Krisman Bertazi size_t nlen = 0; 1109d53690fSGabriel Krisman Bertazi 1119d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur, data, str->name, str->len) < 0) 1129d53690fSGabriel Krisman Bertazi return -EINVAL; 1139d53690fSGabriel Krisman Bertazi 1149d53690fSGabriel Krisman Bertazi for (nlen = 0; nlen < dlen; nlen++) { 1159d53690fSGabriel Krisman Bertazi int c = utf8byte(&cur); 1169d53690fSGabriel Krisman Bertazi 1179d53690fSGabriel Krisman Bertazi dest[nlen] = c; 1189d53690fSGabriel Krisman Bertazi if (!c) 1199d53690fSGabriel Krisman Bertazi return nlen; 1209d53690fSGabriel Krisman Bertazi if (c == -1) 1219d53690fSGabriel Krisman Bertazi break; 1229d53690fSGabriel Krisman Bertazi } 1239d53690fSGabriel Krisman Bertazi return -EINVAL; 1249d53690fSGabriel Krisman Bertazi } 1259d53690fSGabriel Krisman Bertazi 1269d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_casefold); 1279d53690fSGabriel Krisman Bertazi 1289d53690fSGabriel Krisman Bertazi int utf8_normalize(const struct unicode_map *um, const struct qstr *str, 1299d53690fSGabriel Krisman Bertazi unsigned char *dest, size_t dlen) 1309d53690fSGabriel Krisman Bertazi { 1319d53690fSGabriel Krisman Bertazi const struct utf8data *data = utf8nfdi(um->version); 1329d53690fSGabriel Krisman Bertazi struct utf8cursor cur; 1339d53690fSGabriel Krisman Bertazi ssize_t nlen = 0; 1349d53690fSGabriel Krisman Bertazi 1359d53690fSGabriel Krisman Bertazi if (utf8ncursor(&cur, data, str->name, str->len) < 0) 1369d53690fSGabriel Krisman Bertazi return -EINVAL; 1379d53690fSGabriel Krisman Bertazi 1389d53690fSGabriel Krisman Bertazi for (nlen = 0; nlen < dlen; nlen++) { 1399d53690fSGabriel Krisman Bertazi int c = utf8byte(&cur); 1409d53690fSGabriel Krisman Bertazi 1419d53690fSGabriel Krisman Bertazi dest[nlen] = c; 1429d53690fSGabriel Krisman Bertazi if (!c) 1439d53690fSGabriel Krisman Bertazi return nlen; 1449d53690fSGabriel Krisman Bertazi if (c == -1) 1459d53690fSGabriel Krisman Bertazi break; 1469d53690fSGabriel Krisman Bertazi } 1479d53690fSGabriel Krisman Bertazi return -EINVAL; 1489d53690fSGabriel Krisman Bertazi } 1499d53690fSGabriel Krisman Bertazi 1509d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_normalize); 1519d53690fSGabriel Krisman Bertazi 1529d53690fSGabriel Krisman Bertazi static int utf8_parse_version(const char *version, unsigned int *maj, 1539d53690fSGabriel Krisman Bertazi unsigned int *min, unsigned int *rev) 1549d53690fSGabriel Krisman Bertazi { 1559d53690fSGabriel Krisman Bertazi substring_t args[3]; 1569d53690fSGabriel Krisman Bertazi char version_string[12]; 1579d53690fSGabriel Krisman Bertazi const struct match_token token[] = { 1589d53690fSGabriel Krisman Bertazi {1, "%d.%d.%d"}, 1599d53690fSGabriel Krisman Bertazi {0, NULL} 1609d53690fSGabriel Krisman Bertazi }; 1619d53690fSGabriel Krisman Bertazi 1629d53690fSGabriel Krisman Bertazi strncpy(version_string, version, sizeof(version_string)); 1639d53690fSGabriel Krisman Bertazi 1649d53690fSGabriel Krisman Bertazi if (match_token(version_string, token, args) != 1) 1659d53690fSGabriel Krisman Bertazi return -EINVAL; 1669d53690fSGabriel Krisman Bertazi 1679d53690fSGabriel Krisman Bertazi if (match_int(&args[0], maj) || match_int(&args[1], min) || 1689d53690fSGabriel Krisman Bertazi match_int(&args[2], rev)) 1699d53690fSGabriel Krisman Bertazi return -EINVAL; 1709d53690fSGabriel Krisman Bertazi 1719d53690fSGabriel Krisman Bertazi return 0; 1729d53690fSGabriel Krisman Bertazi } 1739d53690fSGabriel Krisman Bertazi 1749d53690fSGabriel Krisman Bertazi struct unicode_map *utf8_load(const char *version) 1759d53690fSGabriel Krisman Bertazi { 1769d53690fSGabriel Krisman Bertazi struct unicode_map *um = NULL; 1779d53690fSGabriel Krisman Bertazi int unicode_version; 1789d53690fSGabriel Krisman Bertazi 1799d53690fSGabriel Krisman Bertazi if (version) { 1809d53690fSGabriel Krisman Bertazi unsigned int maj, min, rev; 1819d53690fSGabriel Krisman Bertazi 1829d53690fSGabriel Krisman Bertazi if (utf8_parse_version(version, &maj, &min, &rev) < 0) 1839d53690fSGabriel Krisman Bertazi return ERR_PTR(-EINVAL); 1849d53690fSGabriel Krisman Bertazi 1859d53690fSGabriel Krisman Bertazi if (!utf8version_is_supported(maj, min, rev)) 1869d53690fSGabriel Krisman Bertazi return ERR_PTR(-EINVAL); 1879d53690fSGabriel Krisman Bertazi 1889d53690fSGabriel Krisman Bertazi unicode_version = UNICODE_AGE(maj, min, rev); 1899d53690fSGabriel Krisman Bertazi } else { 1909d53690fSGabriel Krisman Bertazi unicode_version = utf8version_latest(); 1919d53690fSGabriel Krisman Bertazi printk(KERN_WARNING"UTF-8 version not specified. " 1929d53690fSGabriel Krisman Bertazi "Assuming latest supported version (%d.%d.%d).", 1939d53690fSGabriel Krisman Bertazi (unicode_version >> 16) & 0xff, 1949d53690fSGabriel Krisman Bertazi (unicode_version >> 8) & 0xff, 1959d53690fSGabriel Krisman Bertazi (unicode_version & 0xff)); 1969d53690fSGabriel Krisman Bertazi } 1979d53690fSGabriel Krisman Bertazi 1989d53690fSGabriel Krisman Bertazi um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL); 1999d53690fSGabriel Krisman Bertazi if (!um) 2009d53690fSGabriel Krisman Bertazi return ERR_PTR(-ENOMEM); 2019d53690fSGabriel Krisman Bertazi 2029d53690fSGabriel Krisman Bertazi um->charset = "UTF-8"; 2039d53690fSGabriel Krisman Bertazi um->version = unicode_version; 2049d53690fSGabriel Krisman Bertazi 2059d53690fSGabriel Krisman Bertazi return um; 2069d53690fSGabriel Krisman Bertazi } 2079d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_load); 2089d53690fSGabriel Krisman Bertazi 2099d53690fSGabriel Krisman Bertazi void utf8_unload(struct unicode_map *um) 2109d53690fSGabriel Krisman Bertazi { 2119d53690fSGabriel Krisman Bertazi kfree(um); 2129d53690fSGabriel Krisman Bertazi } 2139d53690fSGabriel Krisman Bertazi EXPORT_SYMBOL(utf8_unload); 2149d53690fSGabriel Krisman Bertazi 2159d53690fSGabriel Krisman Bertazi MODULE_LICENSE("GPL v2"); 216