xref: /openbmc/linux/fs/hfsplus/unicode.c (revision 2179d372)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *  linux/fs/hfsplus/unicode.c
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * Copyright (C) 2001
51da177e4SLinus Torvalds  * Brad Boyer (flar@allandria.com)
61da177e4SLinus Torvalds  * (C) 2003 Ardis Technologies <roman@ardistech.com>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Handler routines for unicode strings
91da177e4SLinus Torvalds  */
101da177e4SLinus Torvalds 
111da177e4SLinus Torvalds #include <linux/types.h>
121da177e4SLinus Torvalds #include <linux/nls.h>
131da177e4SLinus Torvalds #include "hfsplus_fs.h"
141da177e4SLinus Torvalds #include "hfsplus_raw.h"
151da177e4SLinus Torvalds 
161da177e4SLinus Torvalds /* Fold the case of a unicode char, given the 16 bit value */
171da177e4SLinus Torvalds /* Returns folded char, or 0 if ignorable */
181da177e4SLinus Torvalds static inline u16 case_fold(u16 c)
191da177e4SLinus Torvalds {
201da177e4SLinus Torvalds         u16 tmp;
211da177e4SLinus Torvalds 
221da177e4SLinus Torvalds         tmp = hfsplus_case_fold_table[c >> 8];
231da177e4SLinus Torvalds         if (tmp)
241da177e4SLinus Torvalds                 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
251da177e4SLinus Torvalds         else
261da177e4SLinus Torvalds                 tmp = c;
271da177e4SLinus Torvalds         return tmp;
281da177e4SLinus Torvalds }
291da177e4SLinus Torvalds 
301da177e4SLinus Torvalds /* Compare unicode strings, return values like normal strcmp */
312179d372SDavid Elliott int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
322179d372SDavid Elliott 		       const struct hfsplus_unistr *s2)
331da177e4SLinus Torvalds {
341da177e4SLinus Torvalds 	u16 len1, len2, c1, c2;
351da177e4SLinus Torvalds 	const hfsplus_unichr *p1, *p2;
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	len1 = be16_to_cpu(s1->length);
381da177e4SLinus Torvalds 	len2 = be16_to_cpu(s2->length);
391da177e4SLinus Torvalds 	p1 = s1->unicode;
401da177e4SLinus Torvalds 	p2 = s2->unicode;
411da177e4SLinus Torvalds 
421da177e4SLinus Torvalds 	while (1) {
431da177e4SLinus Torvalds 		c1 = c2 = 0;
441da177e4SLinus Torvalds 
451da177e4SLinus Torvalds 		while (len1 && !c1) {
461da177e4SLinus Torvalds 			c1 = case_fold(be16_to_cpu(*p1));
471da177e4SLinus Torvalds 			p1++;
481da177e4SLinus Torvalds 			len1--;
491da177e4SLinus Torvalds 		}
501da177e4SLinus Torvalds 		while (len2 && !c2) {
511da177e4SLinus Torvalds 			c2 = case_fold(be16_to_cpu(*p2));
521da177e4SLinus Torvalds 			p2++;
531da177e4SLinus Torvalds 			len2--;
541da177e4SLinus Torvalds 		}
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds 		if (c1 != c2)
571da177e4SLinus Torvalds 			return (c1 < c2) ? -1 : 1;
581da177e4SLinus Torvalds 		if (!c1 && !c2)
591da177e4SLinus Torvalds 			return 0;
601da177e4SLinus Torvalds 	}
611da177e4SLinus Torvalds }
621da177e4SLinus Torvalds 
632179d372SDavid Elliott /* Compare names as a sequence of 16-bit unsigned integers */
642179d372SDavid Elliott int hfsplus_strcmp(const struct hfsplus_unistr *s1,
652179d372SDavid Elliott 		   const struct hfsplus_unistr *s2)
662179d372SDavid Elliott {
672179d372SDavid Elliott 	u16 len1, len2, c1, c2;
682179d372SDavid Elliott 	const hfsplus_unichr *p1, *p2;
692179d372SDavid Elliott 	int len;
702179d372SDavid Elliott 
712179d372SDavid Elliott 	len1 = be16_to_cpu(s1->length);
722179d372SDavid Elliott 	len2 = be16_to_cpu(s2->length);
732179d372SDavid Elliott 	p1 = s1->unicode;
742179d372SDavid Elliott 	p2 = s2->unicode;
752179d372SDavid Elliott 
762179d372SDavid Elliott 	for (len = min(len1, len2); len > 0; len--) {
772179d372SDavid Elliott 		c1 = be16_to_cpu(*p1);
782179d372SDavid Elliott 		c2 = be16_to_cpu(*p2);
792179d372SDavid Elliott 		if (c1 != c2)
802179d372SDavid Elliott 			return c1 < c2 ? -1 : 1;
812179d372SDavid Elliott 		p1++;
822179d372SDavid Elliott 		p2++;
832179d372SDavid Elliott 	}
842179d372SDavid Elliott 
852179d372SDavid Elliott 	return len1 < len2 ? -1 :
862179d372SDavid Elliott 	       len1 > len2 ? 1 : 0;
872179d372SDavid Elliott }
882179d372SDavid Elliott 
892179d372SDavid Elliott 
901da177e4SLinus Torvalds #define Hangul_SBase	0xac00
911da177e4SLinus Torvalds #define Hangul_LBase	0x1100
921da177e4SLinus Torvalds #define Hangul_VBase	0x1161
931da177e4SLinus Torvalds #define Hangul_TBase	0x11a7
941da177e4SLinus Torvalds #define Hangul_SCount	11172
951da177e4SLinus Torvalds #define Hangul_LCount	19
961da177e4SLinus Torvalds #define Hangul_VCount	21
971da177e4SLinus Torvalds #define Hangul_TCount	28
981da177e4SLinus Torvalds #define Hangul_NCount	(Hangul_VCount * Hangul_TCount)
991da177e4SLinus Torvalds 
1001da177e4SLinus Torvalds 
1011da177e4SLinus Torvalds static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
1021da177e4SLinus Torvalds {
1031da177e4SLinus Torvalds 	int i, s, e;
1041da177e4SLinus Torvalds 
1051da177e4SLinus Torvalds 	s = 1;
1061da177e4SLinus Torvalds 	e = p[1];
1071da177e4SLinus Torvalds 	if (!e || cc < p[s * 2] || cc > p[e * 2])
1081da177e4SLinus Torvalds 		return NULL;
1091da177e4SLinus Torvalds 	do {
1101da177e4SLinus Torvalds 		i = (s + e) / 2;
1111da177e4SLinus Torvalds 		if (cc > p[i * 2])
1121da177e4SLinus Torvalds 			s = i + 1;
1131da177e4SLinus Torvalds 		else if (cc < p[i * 2])
1141da177e4SLinus Torvalds 			e = i - 1;
1151da177e4SLinus Torvalds 		else
1161da177e4SLinus Torvalds 			return hfsplus_compose_table + p[i * 2 + 1];
1171da177e4SLinus Torvalds 	} while (s <= e);
1181da177e4SLinus Torvalds 	return NULL;
1191da177e4SLinus Torvalds }
1201da177e4SLinus Torvalds 
1211da177e4SLinus Torvalds int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p)
1221da177e4SLinus Torvalds {
1231da177e4SLinus Torvalds 	const hfsplus_unichr *ip;
1241da177e4SLinus Torvalds 	struct nls_table *nls = HFSPLUS_SB(sb).nls;
1251da177e4SLinus Torvalds 	u8 *op;
1261da177e4SLinus Torvalds 	u16 cc, c0, c1;
1271da177e4SLinus Torvalds 	u16 *ce1, *ce2;
1281da177e4SLinus Torvalds 	int i, len, ustrlen, res, compose;
1291da177e4SLinus Torvalds 
1301da177e4SLinus Torvalds 	op = astr;
1311da177e4SLinus Torvalds 	ip = ustr->unicode;
1321da177e4SLinus Torvalds 	ustrlen = be16_to_cpu(ustr->length);
1331da177e4SLinus Torvalds 	len = *len_p;
1341da177e4SLinus Torvalds 	ce1 = NULL;
1351da177e4SLinus Torvalds 	compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
1361da177e4SLinus Torvalds 
1371da177e4SLinus Torvalds 	while (ustrlen > 0) {
1381da177e4SLinus Torvalds 		c0 = be16_to_cpu(*ip++);
1391da177e4SLinus Torvalds 		ustrlen--;
1401da177e4SLinus Torvalds 		/* search for single decomposed char */
1411da177e4SLinus Torvalds 		if (likely(compose))
1421da177e4SLinus Torvalds 			ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
1431da177e4SLinus Torvalds 		if (ce1 && (cc = ce1[0])) {
1441da177e4SLinus Torvalds 			/* start of a possibly decomposed Hangul char */
1451da177e4SLinus Torvalds 			if (cc != 0xffff)
1461da177e4SLinus Torvalds 				goto done;
1471da177e4SLinus Torvalds 			if (!ustrlen)
1481da177e4SLinus Torvalds 				goto same;
1491da177e4SLinus Torvalds 			c1 = be16_to_cpu(*ip) - Hangul_VBase;
1501da177e4SLinus Torvalds 			if (c1 < Hangul_VCount) {
1511da177e4SLinus Torvalds 				/* compose the Hangul char */
1521da177e4SLinus Torvalds 				cc = (c0 - Hangul_LBase) * Hangul_VCount;
1531da177e4SLinus Torvalds 				cc = (cc + c1) * Hangul_TCount;
1541da177e4SLinus Torvalds 				cc += Hangul_SBase;
1551da177e4SLinus Torvalds 				ip++;
1561da177e4SLinus Torvalds 				ustrlen--;
1571da177e4SLinus Torvalds 				if (!ustrlen)
1581da177e4SLinus Torvalds 					goto done;
1591da177e4SLinus Torvalds 				c1 = be16_to_cpu(*ip) - Hangul_TBase;
1601da177e4SLinus Torvalds 				if (c1 > 0 && c1 < Hangul_TCount) {
1611da177e4SLinus Torvalds 					cc += c1;
1621da177e4SLinus Torvalds 					ip++;
1631da177e4SLinus Torvalds 					ustrlen--;
1641da177e4SLinus Torvalds 				}
1651da177e4SLinus Torvalds 				goto done;
1661da177e4SLinus Torvalds 			}
1671da177e4SLinus Torvalds 		}
1681da177e4SLinus Torvalds 		while (1) {
1691da177e4SLinus Torvalds 			/* main loop for common case of not composed chars */
1701da177e4SLinus Torvalds 			if (!ustrlen)
1711da177e4SLinus Torvalds 				goto same;
1721da177e4SLinus Torvalds 			c1 = be16_to_cpu(*ip);
1731da177e4SLinus Torvalds 			if (likely(compose))
1741da177e4SLinus Torvalds 				ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c1);
1751da177e4SLinus Torvalds 			if (ce1)
1761da177e4SLinus Torvalds 				break;
1771da177e4SLinus Torvalds 			switch (c0) {
1781da177e4SLinus Torvalds 			case 0:
1791da177e4SLinus Torvalds 				c0 = 0x2400;
1801da177e4SLinus Torvalds 				break;
1811da177e4SLinus Torvalds 			case '/':
1821da177e4SLinus Torvalds 				c0 = ':';
1831da177e4SLinus Torvalds 				break;
1841da177e4SLinus Torvalds 			}
1851da177e4SLinus Torvalds 			res = nls->uni2char(c0, op, len);
1861da177e4SLinus Torvalds 			if (res < 0) {
1871da177e4SLinus Torvalds 				if (res == -ENAMETOOLONG)
1881da177e4SLinus Torvalds 					goto out;
1891da177e4SLinus Torvalds 				*op = '?';
1901da177e4SLinus Torvalds 				res = 1;
1911da177e4SLinus Torvalds 			}
1921da177e4SLinus Torvalds 			op += res;
1931da177e4SLinus Torvalds 			len -= res;
1941da177e4SLinus Torvalds 			c0 = c1;
1951da177e4SLinus Torvalds 			ip++;
1961da177e4SLinus Torvalds 			ustrlen--;
1971da177e4SLinus Torvalds 		}
1981da177e4SLinus Torvalds 		ce2 = hfsplus_compose_lookup(ce1, c0);
1991da177e4SLinus Torvalds 		if (ce2) {
2001da177e4SLinus Torvalds 			i = 1;
2011da177e4SLinus Torvalds 			while (i < ustrlen) {
2021da177e4SLinus Torvalds 				ce1 = hfsplus_compose_lookup(ce2, be16_to_cpu(ip[i]));
2031da177e4SLinus Torvalds 				if (!ce1)
2041da177e4SLinus Torvalds 					break;
2051da177e4SLinus Torvalds 				i++;
2061da177e4SLinus Torvalds 				ce2 = ce1;
2071da177e4SLinus Torvalds 			}
2081da177e4SLinus Torvalds 			if ((cc = ce2[0])) {
2091da177e4SLinus Torvalds 				ip += i;
2101da177e4SLinus Torvalds 				ustrlen -= i;
2111da177e4SLinus Torvalds 				goto done;
2121da177e4SLinus Torvalds 			}
2131da177e4SLinus Torvalds 		}
2141da177e4SLinus Torvalds 	same:
2151da177e4SLinus Torvalds 		switch (c0) {
2161da177e4SLinus Torvalds 		case 0:
2171da177e4SLinus Torvalds 			cc = 0x2400;
2181da177e4SLinus Torvalds 			break;
2191da177e4SLinus Torvalds 		case '/':
2201da177e4SLinus Torvalds 			cc = ':';
2211da177e4SLinus Torvalds 			break;
2221da177e4SLinus Torvalds 		default:
2231da177e4SLinus Torvalds 			cc = c0;
2241da177e4SLinus Torvalds 		}
2251da177e4SLinus Torvalds 	done:
2261da177e4SLinus Torvalds 		res = nls->uni2char(cc, op, len);
2271da177e4SLinus Torvalds 		if (res < 0) {
2281da177e4SLinus Torvalds 			if (res == -ENAMETOOLONG)
2291da177e4SLinus Torvalds 				goto out;
2301da177e4SLinus Torvalds 			*op = '?';
2311da177e4SLinus Torvalds 			res = 1;
2321da177e4SLinus Torvalds 		}
2331da177e4SLinus Torvalds 		op += res;
2341da177e4SLinus Torvalds 		len -= res;
2351da177e4SLinus Torvalds 	}
2361da177e4SLinus Torvalds 	res = 0;
2371da177e4SLinus Torvalds out:
2381da177e4SLinus Torvalds 	*len_p = (char *)op - astr;
2391da177e4SLinus Torvalds 	return res;
2401da177e4SLinus Torvalds }
2411da177e4SLinus Torvalds 
2421da177e4SLinus Torvalds int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, const char *astr, int len)
2431da177e4SLinus Torvalds {
2441da177e4SLinus Torvalds 	struct nls_table *nls = HFSPLUS_SB(sb).nls;
2451da177e4SLinus Torvalds 	int size, off, decompose;
2461da177e4SLinus Torvalds 	wchar_t c;
2471da177e4SLinus Torvalds 	u16 outlen = 0;
2481da177e4SLinus Torvalds 
2491da177e4SLinus Torvalds 	decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
2501da177e4SLinus Torvalds 
2511da177e4SLinus Torvalds 	while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
2521da177e4SLinus Torvalds 		size = nls->char2uni(astr, len, &c);
2531da177e4SLinus Torvalds 		if (size <= 0) {
2541da177e4SLinus Torvalds 			c = '?';
2551da177e4SLinus Torvalds 			size = 1;
2561da177e4SLinus Torvalds 		}
2571da177e4SLinus Torvalds 		astr += size;
2581da177e4SLinus Torvalds 		len -= size;
2591da177e4SLinus Torvalds 		switch (c) {
2601da177e4SLinus Torvalds 		case 0x2400:
2611da177e4SLinus Torvalds 			c = 0;
2621da177e4SLinus Torvalds 			break;
2631da177e4SLinus Torvalds 		case ':':
2641da177e4SLinus Torvalds 			c = '/';
2651da177e4SLinus Torvalds 			break;
2661da177e4SLinus Torvalds 		}
2671da177e4SLinus Torvalds 		if (c >= 0xc0 && decompose) {
2681da177e4SLinus Torvalds 			off = hfsplus_decompose_table[(c >> 12) & 0xf];
2691da177e4SLinus Torvalds 			if (!off)
2701da177e4SLinus Torvalds 				goto done;
2711da177e4SLinus Torvalds 			if (off == 0xffff) {
2721da177e4SLinus Torvalds 				goto done;
2731da177e4SLinus Torvalds 			}
2741da177e4SLinus Torvalds 			off = hfsplus_decompose_table[off + ((c >> 8) & 0xf)];
2751da177e4SLinus Torvalds 			if (!off)
2761da177e4SLinus Torvalds 				goto done;
2771da177e4SLinus Torvalds 			off = hfsplus_decompose_table[off + ((c >> 4) & 0xf)];
2781da177e4SLinus Torvalds 			if (!off)
2791da177e4SLinus Torvalds 				goto done;
2801da177e4SLinus Torvalds 			off = hfsplus_decompose_table[off + (c & 0xf)];
2811da177e4SLinus Torvalds 			size = off & 3;
2821da177e4SLinus Torvalds 			if (!size)
2831da177e4SLinus Torvalds 				goto done;
2841da177e4SLinus Torvalds 			off /= 4;
2851da177e4SLinus Torvalds 			if (outlen + size > HFSPLUS_MAX_STRLEN)
2861da177e4SLinus Torvalds 				break;
2871da177e4SLinus Torvalds 			do {
2881da177e4SLinus Torvalds 				ustr->unicode[outlen++] = cpu_to_be16(hfsplus_decompose_table[off++]);
2891da177e4SLinus Torvalds 			} while (--size > 0);
2901da177e4SLinus Torvalds 			continue;
2911da177e4SLinus Torvalds 		}
2921da177e4SLinus Torvalds 	done:
2931da177e4SLinus Torvalds 		ustr->unicode[outlen++] = cpu_to_be16(c);
2941da177e4SLinus Torvalds 	}
2951da177e4SLinus Torvalds 	ustr->length = cpu_to_be16(outlen);
2961da177e4SLinus Torvalds 	if (len > 0)
2971da177e4SLinus Torvalds 		return -ENAMETOOLONG;
2981da177e4SLinus Torvalds 	return 0;
2991da177e4SLinus Torvalds }
300