11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/fs/hfsplus/unicode.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2001 51da177e4SLinus Torvalds * Brad Boyer (flar@allandria.com) 61da177e4SLinus Torvalds * (C) 2003 Ardis Technologies <roman@ardistech.com> 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Handler routines for unicode strings 91da177e4SLinus Torvalds */ 101da177e4SLinus Torvalds 111da177e4SLinus Torvalds #include <linux/types.h> 121da177e4SLinus Torvalds #include <linux/nls.h> 131da177e4SLinus Torvalds #include "hfsplus_fs.h" 141da177e4SLinus Torvalds #include "hfsplus_raw.h" 151da177e4SLinus Torvalds 161da177e4SLinus Torvalds /* Fold the case of a unicode char, given the 16 bit value */ 171da177e4SLinus Torvalds /* Returns folded char, or 0 if ignorable */ 181da177e4SLinus Torvalds static inline u16 case_fold(u16 c) 191da177e4SLinus Torvalds { 201da177e4SLinus Torvalds u16 tmp; 211da177e4SLinus Torvalds 221da177e4SLinus Torvalds tmp = hfsplus_case_fold_table[c >> 8]; 231da177e4SLinus Torvalds if (tmp) 241da177e4SLinus Torvalds tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; 251da177e4SLinus Torvalds else 261da177e4SLinus Torvalds tmp = c; 271da177e4SLinus Torvalds return tmp; 281da177e4SLinus Torvalds } 291da177e4SLinus Torvalds 301da177e4SLinus Torvalds /* Compare unicode strings, return values like normal strcmp */ 312179d372SDavid Elliott int hfsplus_strcasecmp(const struct hfsplus_unistr *s1, 322179d372SDavid Elliott const struct hfsplus_unistr *s2) 331da177e4SLinus Torvalds { 341da177e4SLinus Torvalds u16 len1, len2, c1, c2; 351da177e4SLinus Torvalds const hfsplus_unichr *p1, *p2; 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds len1 = be16_to_cpu(s1->length); 381da177e4SLinus Torvalds len2 = be16_to_cpu(s2->length); 391da177e4SLinus Torvalds p1 = s1->unicode; 401da177e4SLinus Torvalds p2 = s2->unicode; 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds while (1) { 431da177e4SLinus Torvalds c1 = c2 = 0; 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds while (len1 && !c1) { 461da177e4SLinus Torvalds c1 = case_fold(be16_to_cpu(*p1)); 471da177e4SLinus Torvalds p1++; 481da177e4SLinus Torvalds len1--; 491da177e4SLinus Torvalds } 501da177e4SLinus Torvalds while (len2 && !c2) { 511da177e4SLinus Torvalds c2 = case_fold(be16_to_cpu(*p2)); 521da177e4SLinus Torvalds p2++; 531da177e4SLinus Torvalds len2--; 541da177e4SLinus Torvalds } 551da177e4SLinus Torvalds 561da177e4SLinus Torvalds if (c1 != c2) 571da177e4SLinus Torvalds return (c1 < c2) ? -1 : 1; 581da177e4SLinus Torvalds if (!c1 && !c2) 591da177e4SLinus Torvalds return 0; 601da177e4SLinus Torvalds } 611da177e4SLinus Torvalds } 621da177e4SLinus Torvalds 632179d372SDavid Elliott /* Compare names as a sequence of 16-bit unsigned integers */ 642179d372SDavid Elliott int hfsplus_strcmp(const struct hfsplus_unistr *s1, 652179d372SDavid Elliott const struct hfsplus_unistr *s2) 662179d372SDavid Elliott { 672179d372SDavid Elliott u16 len1, len2, c1, c2; 682179d372SDavid Elliott const hfsplus_unichr *p1, *p2; 692179d372SDavid Elliott int len; 702179d372SDavid Elliott 712179d372SDavid Elliott len1 = be16_to_cpu(s1->length); 722179d372SDavid Elliott len2 = be16_to_cpu(s2->length); 732179d372SDavid Elliott p1 = s1->unicode; 742179d372SDavid Elliott p2 = s2->unicode; 752179d372SDavid Elliott 762179d372SDavid Elliott for (len = min(len1, len2); len > 0; len--) { 772179d372SDavid Elliott c1 = be16_to_cpu(*p1); 782179d372SDavid Elliott c2 = be16_to_cpu(*p2); 792179d372SDavid Elliott if (c1 != c2) 802179d372SDavid Elliott return c1 < c2 ? -1 : 1; 812179d372SDavid Elliott p1++; 822179d372SDavid Elliott p2++; 832179d372SDavid Elliott } 842179d372SDavid Elliott 852179d372SDavid Elliott return len1 < len2 ? -1 : 862179d372SDavid Elliott len1 > len2 ? 1 : 0; 872179d372SDavid Elliott } 882179d372SDavid Elliott 892179d372SDavid Elliott 901da177e4SLinus Torvalds #define Hangul_SBase 0xac00 911da177e4SLinus Torvalds #define Hangul_LBase 0x1100 921da177e4SLinus Torvalds #define Hangul_VBase 0x1161 931da177e4SLinus Torvalds #define Hangul_TBase 0x11a7 941da177e4SLinus Torvalds #define Hangul_SCount 11172 951da177e4SLinus Torvalds #define Hangul_LCount 19 961da177e4SLinus Torvalds #define Hangul_VCount 21 971da177e4SLinus Torvalds #define Hangul_TCount 28 981da177e4SLinus Torvalds #define Hangul_NCount (Hangul_VCount * Hangul_TCount) 991da177e4SLinus Torvalds 1001da177e4SLinus Torvalds 1011da177e4SLinus Torvalds static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) 1021da177e4SLinus Torvalds { 1031da177e4SLinus Torvalds int i, s, e; 1041da177e4SLinus Torvalds 1051da177e4SLinus Torvalds s = 1; 1061da177e4SLinus Torvalds e = p[1]; 1071da177e4SLinus Torvalds if (!e || cc < p[s * 2] || cc > p[e * 2]) 1081da177e4SLinus Torvalds return NULL; 1091da177e4SLinus Torvalds do { 1101da177e4SLinus Torvalds i = (s + e) / 2; 1111da177e4SLinus Torvalds if (cc > p[i * 2]) 1121da177e4SLinus Torvalds s = i + 1; 1131da177e4SLinus Torvalds else if (cc < p[i * 2]) 1141da177e4SLinus Torvalds e = i - 1; 1151da177e4SLinus Torvalds else 1161da177e4SLinus Torvalds return hfsplus_compose_table + p[i * 2 + 1]; 1171da177e4SLinus Torvalds } while (s <= e); 1181da177e4SLinus Torvalds return NULL; 1191da177e4SLinus Torvalds } 1201da177e4SLinus Torvalds 1211da177e4SLinus Torvalds int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) 1221da177e4SLinus Torvalds { 1231da177e4SLinus Torvalds const hfsplus_unichr *ip; 1241da177e4SLinus Torvalds struct nls_table *nls = HFSPLUS_SB(sb).nls; 1251da177e4SLinus Torvalds u8 *op; 1261da177e4SLinus Torvalds u16 cc, c0, c1; 1271da177e4SLinus Torvalds u16 *ce1, *ce2; 1281da177e4SLinus Torvalds int i, len, ustrlen, res, compose; 1291da177e4SLinus Torvalds 1301da177e4SLinus Torvalds op = astr; 1311da177e4SLinus Torvalds ip = ustr->unicode; 1321da177e4SLinus Torvalds ustrlen = be16_to_cpu(ustr->length); 1331da177e4SLinus Torvalds len = *len_p; 1341da177e4SLinus Torvalds ce1 = NULL; 1351da177e4SLinus Torvalds compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); 1361da177e4SLinus Torvalds 1371da177e4SLinus Torvalds while (ustrlen > 0) { 1381da177e4SLinus Torvalds c0 = be16_to_cpu(*ip++); 1391da177e4SLinus Torvalds ustrlen--; 1401da177e4SLinus Torvalds /* search for single decomposed char */ 1411da177e4SLinus Torvalds if (likely(compose)) 1421da177e4SLinus Torvalds ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); 1431da177e4SLinus Torvalds if (ce1 && (cc = ce1[0])) { 1441da177e4SLinus Torvalds /* start of a possibly decomposed Hangul char */ 1451da177e4SLinus Torvalds if (cc != 0xffff) 1461da177e4SLinus Torvalds goto done; 1471da177e4SLinus Torvalds if (!ustrlen) 1481da177e4SLinus Torvalds goto same; 1491da177e4SLinus Torvalds c1 = be16_to_cpu(*ip) - Hangul_VBase; 1501da177e4SLinus Torvalds if (c1 < Hangul_VCount) { 1511da177e4SLinus Torvalds /* compose the Hangul char */ 1521da177e4SLinus Torvalds cc = (c0 - Hangul_LBase) * Hangul_VCount; 1531da177e4SLinus Torvalds cc = (cc + c1) * Hangul_TCount; 1541da177e4SLinus Torvalds cc += Hangul_SBase; 1551da177e4SLinus Torvalds ip++; 1561da177e4SLinus Torvalds ustrlen--; 1571da177e4SLinus Torvalds if (!ustrlen) 1581da177e4SLinus Torvalds goto done; 1591da177e4SLinus Torvalds c1 = be16_to_cpu(*ip) - Hangul_TBase; 1601da177e4SLinus Torvalds if (c1 > 0 && c1 < Hangul_TCount) { 1611da177e4SLinus Torvalds cc += c1; 1621da177e4SLinus Torvalds ip++; 1631da177e4SLinus Torvalds ustrlen--; 1641da177e4SLinus Torvalds } 1651da177e4SLinus Torvalds goto done; 1661da177e4SLinus Torvalds } 1671da177e4SLinus Torvalds } 1681da177e4SLinus Torvalds while (1) { 1691da177e4SLinus Torvalds /* main loop for common case of not composed chars */ 1701da177e4SLinus Torvalds if (!ustrlen) 1711da177e4SLinus Torvalds goto same; 1721da177e4SLinus Torvalds c1 = be16_to_cpu(*ip); 1731da177e4SLinus Torvalds if (likely(compose)) 1741da177e4SLinus Torvalds ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c1); 1751da177e4SLinus Torvalds if (ce1) 1761da177e4SLinus Torvalds break; 1771da177e4SLinus Torvalds switch (c0) { 1781da177e4SLinus Torvalds case 0: 1791da177e4SLinus Torvalds c0 = 0x2400; 1801da177e4SLinus Torvalds break; 1811da177e4SLinus Torvalds case '/': 1821da177e4SLinus Torvalds c0 = ':'; 1831da177e4SLinus Torvalds break; 1841da177e4SLinus Torvalds } 1851da177e4SLinus Torvalds res = nls->uni2char(c0, op, len); 1861da177e4SLinus Torvalds if (res < 0) { 1871da177e4SLinus Torvalds if (res == -ENAMETOOLONG) 1881da177e4SLinus Torvalds goto out; 1891da177e4SLinus Torvalds *op = '?'; 1901da177e4SLinus Torvalds res = 1; 1911da177e4SLinus Torvalds } 1921da177e4SLinus Torvalds op += res; 1931da177e4SLinus Torvalds len -= res; 1941da177e4SLinus Torvalds c0 = c1; 1951da177e4SLinus Torvalds ip++; 1961da177e4SLinus Torvalds ustrlen--; 1971da177e4SLinus Torvalds } 1981da177e4SLinus Torvalds ce2 = hfsplus_compose_lookup(ce1, c0); 1991da177e4SLinus Torvalds if (ce2) { 2001da177e4SLinus Torvalds i = 1; 2011da177e4SLinus Torvalds while (i < ustrlen) { 2021da177e4SLinus Torvalds ce1 = hfsplus_compose_lookup(ce2, be16_to_cpu(ip[i])); 2031da177e4SLinus Torvalds if (!ce1) 2041da177e4SLinus Torvalds break; 2051da177e4SLinus Torvalds i++; 2061da177e4SLinus Torvalds ce2 = ce1; 2071da177e4SLinus Torvalds } 2081da177e4SLinus Torvalds if ((cc = ce2[0])) { 2091da177e4SLinus Torvalds ip += i; 2101da177e4SLinus Torvalds ustrlen -= i; 2111da177e4SLinus Torvalds goto done; 2121da177e4SLinus Torvalds } 2131da177e4SLinus Torvalds } 2141da177e4SLinus Torvalds same: 2151da177e4SLinus Torvalds switch (c0) { 2161da177e4SLinus Torvalds case 0: 2171da177e4SLinus Torvalds cc = 0x2400; 2181da177e4SLinus Torvalds break; 2191da177e4SLinus Torvalds case '/': 2201da177e4SLinus Torvalds cc = ':'; 2211da177e4SLinus Torvalds break; 2221da177e4SLinus Torvalds default: 2231da177e4SLinus Torvalds cc = c0; 2241da177e4SLinus Torvalds } 2251da177e4SLinus Torvalds done: 2261da177e4SLinus Torvalds res = nls->uni2char(cc, op, len); 2271da177e4SLinus Torvalds if (res < 0) { 2281da177e4SLinus Torvalds if (res == -ENAMETOOLONG) 2291da177e4SLinus Torvalds goto out; 2301da177e4SLinus Torvalds *op = '?'; 2311da177e4SLinus Torvalds res = 1; 2321da177e4SLinus Torvalds } 2331da177e4SLinus Torvalds op += res; 2341da177e4SLinus Torvalds len -= res; 2351da177e4SLinus Torvalds } 2361da177e4SLinus Torvalds res = 0; 2371da177e4SLinus Torvalds out: 2381da177e4SLinus Torvalds *len_p = (char *)op - astr; 2391da177e4SLinus Torvalds return res; 2401da177e4SLinus Torvalds } 2411da177e4SLinus Torvalds 2421da177e4SLinus Torvalds int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, const char *astr, int len) 2431da177e4SLinus Torvalds { 2441da177e4SLinus Torvalds struct nls_table *nls = HFSPLUS_SB(sb).nls; 2451da177e4SLinus Torvalds int size, off, decompose; 2461da177e4SLinus Torvalds wchar_t c; 2471da177e4SLinus Torvalds u16 outlen = 0; 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); 2501da177e4SLinus Torvalds 2511da177e4SLinus Torvalds while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { 2521da177e4SLinus Torvalds size = nls->char2uni(astr, len, &c); 2531da177e4SLinus Torvalds if (size <= 0) { 2541da177e4SLinus Torvalds c = '?'; 2551da177e4SLinus Torvalds size = 1; 2561da177e4SLinus Torvalds } 2571da177e4SLinus Torvalds astr += size; 2581da177e4SLinus Torvalds len -= size; 2591da177e4SLinus Torvalds switch (c) { 2601da177e4SLinus Torvalds case 0x2400: 2611da177e4SLinus Torvalds c = 0; 2621da177e4SLinus Torvalds break; 2631da177e4SLinus Torvalds case ':': 2641da177e4SLinus Torvalds c = '/'; 2651da177e4SLinus Torvalds break; 2661da177e4SLinus Torvalds } 2671da177e4SLinus Torvalds if (c >= 0xc0 && decompose) { 2681da177e4SLinus Torvalds off = hfsplus_decompose_table[(c >> 12) & 0xf]; 2691da177e4SLinus Torvalds if (!off) 2701da177e4SLinus Torvalds goto done; 2711da177e4SLinus Torvalds if (off == 0xffff) { 2721da177e4SLinus Torvalds goto done; 2731da177e4SLinus Torvalds } 2741da177e4SLinus Torvalds off = hfsplus_decompose_table[off + ((c >> 8) & 0xf)]; 2751da177e4SLinus Torvalds if (!off) 2761da177e4SLinus Torvalds goto done; 2771da177e4SLinus Torvalds off = hfsplus_decompose_table[off + ((c >> 4) & 0xf)]; 2781da177e4SLinus Torvalds if (!off) 2791da177e4SLinus Torvalds goto done; 2801da177e4SLinus Torvalds off = hfsplus_decompose_table[off + (c & 0xf)]; 2811da177e4SLinus Torvalds size = off & 3; 2821da177e4SLinus Torvalds if (!size) 2831da177e4SLinus Torvalds goto done; 2841da177e4SLinus Torvalds off /= 4; 2851da177e4SLinus Torvalds if (outlen + size > HFSPLUS_MAX_STRLEN) 2861da177e4SLinus Torvalds break; 2871da177e4SLinus Torvalds do { 2881da177e4SLinus Torvalds ustr->unicode[outlen++] = cpu_to_be16(hfsplus_decompose_table[off++]); 2891da177e4SLinus Torvalds } while (--size > 0); 2901da177e4SLinus Torvalds continue; 2911da177e4SLinus Torvalds } 2921da177e4SLinus Torvalds done: 2931da177e4SLinus Torvalds ustr->unicode[outlen++] = cpu_to_be16(c); 2941da177e4SLinus Torvalds } 2951da177e4SLinus Torvalds ustr->length = cpu_to_be16(outlen); 2961da177e4SLinus Torvalds if (len > 0) 2971da177e4SLinus Torvalds return -ENAMETOOLONG; 2981da177e4SLinus Torvalds return 0; 2991da177e4SLinus Torvalds } 300