xref: /openbmc/linux/fs/hfsplus/unicode.c (revision 92ed1a76)
1 /*
2  *  linux/fs/hfsplus/unicode.c
3  *
4  * Copyright (C) 2001
5  * Brad Boyer (flar@allandria.com)
6  * (C) 2003 Ardis Technologies <roman@ardistech.com>
7  *
8  * Handler routines for unicode strings
9  */
10 
11 #include <linux/types.h>
12 #include <linux/nls.h>
13 #include "hfsplus_fs.h"
14 #include "hfsplus_raw.h"
15 
16 /* Fold the case of a unicode char, given the 16 bit value */
17 /* Returns folded char, or 0 if ignorable */
18 static inline u16 case_fold(u16 c)
19 {
20 	u16 tmp;
21 
22 	tmp = hfsplus_case_fold_table[c >> 8];
23 	if (tmp)
24 		tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
25 	else
26 		tmp = c;
27 	return tmp;
28 }
29 
30 /* Compare unicode strings, return values like normal strcmp */
31 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
32 		       const struct hfsplus_unistr *s2)
33 {
34 	u16 len1, len2, c1, c2;
35 	const hfsplus_unichr *p1, *p2;
36 
37 	len1 = be16_to_cpu(s1->length);
38 	len2 = be16_to_cpu(s2->length);
39 	p1 = s1->unicode;
40 	p2 = s2->unicode;
41 
42 	while (1) {
43 		c1 = c2 = 0;
44 
45 		while (len1 && !c1) {
46 			c1 = case_fold(be16_to_cpu(*p1));
47 			p1++;
48 			len1--;
49 		}
50 		while (len2 && !c2) {
51 			c2 = case_fold(be16_to_cpu(*p2));
52 			p2++;
53 			len2--;
54 		}
55 
56 		if (c1 != c2)
57 			return (c1 < c2) ? -1 : 1;
58 		if (!c1 && !c2)
59 			return 0;
60 	}
61 }
62 
63 /* Compare names as a sequence of 16-bit unsigned integers */
64 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
65 		   const struct hfsplus_unistr *s2)
66 {
67 	u16 len1, len2, c1, c2;
68 	const hfsplus_unichr *p1, *p2;
69 	int len;
70 
71 	len1 = be16_to_cpu(s1->length);
72 	len2 = be16_to_cpu(s2->length);
73 	p1 = s1->unicode;
74 	p2 = s2->unicode;
75 
76 	for (len = min(len1, len2); len > 0; len--) {
77 		c1 = be16_to_cpu(*p1);
78 		c2 = be16_to_cpu(*p2);
79 		if (c1 != c2)
80 			return c1 < c2 ? -1 : 1;
81 		p1++;
82 		p2++;
83 	}
84 
85 	return len1 < len2 ? -1 :
86 	       len1 > len2 ? 1 : 0;
87 }
88 
89 
90 #define Hangul_SBase	0xac00
91 #define Hangul_LBase	0x1100
92 #define Hangul_VBase	0x1161
93 #define Hangul_TBase	0x11a7
94 #define Hangul_SCount	11172
95 #define Hangul_LCount	19
96 #define Hangul_VCount	21
97 #define Hangul_TCount	28
98 #define Hangul_NCount	(Hangul_VCount * Hangul_TCount)
99 
100 
101 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
102 {
103 	int i, s, e;
104 
105 	s = 1;
106 	e = p[1];
107 	if (!e || cc < p[s * 2] || cc > p[e * 2])
108 		return NULL;
109 	do {
110 		i = (s + e) / 2;
111 		if (cc > p[i * 2])
112 			s = i + 1;
113 		else if (cc < p[i * 2])
114 			e = i - 1;
115 		else
116 			return hfsplus_compose_table + p[i * 2 + 1];
117 	} while (s <= e);
118 	return NULL;
119 }
120 
121 int hfsplus_uni2asc(struct super_block *sb,
122 		const struct hfsplus_unistr *ustr,
123 		char *astr, int *len_p)
124 {
125 	const hfsplus_unichr *ip;
126 	struct nls_table *nls = HFSPLUS_SB(sb)->nls;
127 	u8 *op;
128 	u16 cc, c0, c1;
129 	u16 *ce1, *ce2;
130 	int i, len, ustrlen, res, compose;
131 
132 	op = astr;
133 	ip = ustr->unicode;
134 	ustrlen = be16_to_cpu(ustr->length);
135 	len = *len_p;
136 	ce1 = NULL;
137 	compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
138 
139 	while (ustrlen > 0) {
140 		c0 = be16_to_cpu(*ip++);
141 		ustrlen--;
142 		/* search for single decomposed char */
143 		if (likely(compose))
144 			ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
145 		if (ce1 && (cc = ce1[0])) {
146 			/* start of a possibly decomposed Hangul char */
147 			if (cc != 0xffff)
148 				goto done;
149 			if (!ustrlen)
150 				goto same;
151 			c1 = be16_to_cpu(*ip) - Hangul_VBase;
152 			if (c1 < Hangul_VCount) {
153 				/* compose the Hangul char */
154 				cc = (c0 - Hangul_LBase) * Hangul_VCount;
155 				cc = (cc + c1) * Hangul_TCount;
156 				cc += Hangul_SBase;
157 				ip++;
158 				ustrlen--;
159 				if (!ustrlen)
160 					goto done;
161 				c1 = be16_to_cpu(*ip) - Hangul_TBase;
162 				if (c1 > 0 && c1 < Hangul_TCount) {
163 					cc += c1;
164 					ip++;
165 					ustrlen--;
166 				}
167 				goto done;
168 			}
169 		}
170 		while (1) {
171 			/* main loop for common case of not composed chars */
172 			if (!ustrlen)
173 				goto same;
174 			c1 = be16_to_cpu(*ip);
175 			if (likely(compose))
176 				ce1 = hfsplus_compose_lookup(
177 					hfsplus_compose_table, c1);
178 			if (ce1)
179 				break;
180 			switch (c0) {
181 			case 0:
182 				c0 = 0x2400;
183 				break;
184 			case '/':
185 				c0 = ':';
186 				break;
187 			}
188 			res = nls->uni2char(c0, op, len);
189 			if (res < 0) {
190 				if (res == -ENAMETOOLONG)
191 					goto out;
192 				*op = '?';
193 				res = 1;
194 			}
195 			op += res;
196 			len -= res;
197 			c0 = c1;
198 			ip++;
199 			ustrlen--;
200 		}
201 		ce2 = hfsplus_compose_lookup(ce1, c0);
202 		if (ce2) {
203 			i = 1;
204 			while (i < ustrlen) {
205 				ce1 = hfsplus_compose_lookup(ce2,
206 					be16_to_cpu(ip[i]));
207 				if (!ce1)
208 					break;
209 				i++;
210 				ce2 = ce1;
211 			}
212 			if ((cc = ce2[0])) {
213 				ip += i;
214 				ustrlen -= i;
215 				goto done;
216 			}
217 		}
218 same:
219 		switch (c0) {
220 		case 0:
221 			cc = 0x2400;
222 			break;
223 		case '/':
224 			cc = ':';
225 			break;
226 		default:
227 			cc = c0;
228 		}
229 done:
230 		res = nls->uni2char(cc, op, len);
231 		if (res < 0) {
232 			if (res == -ENAMETOOLONG)
233 				goto out;
234 			*op = '?';
235 			res = 1;
236 		}
237 		op += res;
238 		len -= res;
239 	}
240 	res = 0;
241 out:
242 	*len_p = (char *)op - astr;
243 	return res;
244 }
245 
246 /*
247  * Convert one or more ASCII characters into a single unicode character.
248  * Returns the number of ASCII characters corresponding to the unicode char.
249  */
250 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
251 			      wchar_t *uc)
252 {
253 	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
254 	if (size <= 0) {
255 		*uc = '?';
256 		size = 1;
257 	}
258 	switch (*uc) {
259 	case 0x2400:
260 		*uc = 0;
261 		break;
262 	case ':':
263 		*uc = '/';
264 		break;
265 	}
266 	return size;
267 }
268 
269 /* Decomposes a single unicode character. */
270 static inline u16 *decompose_unichar(wchar_t uc, int *size)
271 {
272 	int off;
273 
274 	off = hfsplus_decompose_table[(uc >> 12) & 0xf];
275 	if (off == 0 || off == 0xffff)
276 		return NULL;
277 
278 	off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
279 	if (!off)
280 		return NULL;
281 
282 	off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
283 	if (!off)
284 		return NULL;
285 
286 	off = hfsplus_decompose_table[off + (uc & 0xf)];
287 	*size = off & 3;
288 	if (*size == 0)
289 		return NULL;
290 	return hfsplus_decompose_table + (off / 4);
291 }
292 
293 int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
294 		    const char *astr, int len)
295 {
296 	int size, dsize, decompose;
297 	u16 *dstr, outlen = 0;
298 	wchar_t c;
299 
300 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
301 	while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
302 		size = asc2unichar(sb, astr, len, &c);
303 
304 		if (decompose && (dstr = decompose_unichar(c, &dsize))) {
305 			if (outlen + dsize > HFSPLUS_MAX_STRLEN)
306 				break;
307 			do {
308 				ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
309 			} while (--dsize > 0);
310 		} else
311 			ustr->unicode[outlen++] = cpu_to_be16(c);
312 
313 		astr += size;
314 		len -= size;
315 	}
316 	ustr->length = cpu_to_be16(outlen);
317 	if (len > 0)
318 		return -ENAMETOOLONG;
319 	return 0;
320 }
321 
322 /*
323  * Hash a string to an integer as appropriate for the HFS+ filesystem.
324  * Composed unicode characters are decomposed and case-folding is performed
325  * if the appropriate bits are (un)set on the superblock.
326  */
327 int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
328 		struct qstr *str)
329 {
330 	struct super_block *sb = dentry->d_sb;
331 	const char *astr;
332 	const u16 *dstr;
333 	int casefold, decompose, size, len;
334 	unsigned long hash;
335 	wchar_t c;
336 	u16 c2;
337 
338 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
339 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
340 	hash = init_name_hash();
341 	astr = str->name;
342 	len = str->len;
343 	while (len > 0) {
344 		int uninitialized_var(dsize);
345 		size = asc2unichar(sb, astr, len, &c);
346 		astr += size;
347 		len -= size;
348 
349 		if (decompose && (dstr = decompose_unichar(c, &dsize))) {
350 			do {
351 				c2 = *dstr++;
352 				if (!casefold || (c2 = case_fold(c2)))
353 					hash = partial_name_hash(c2, hash);
354 			} while (--dsize > 0);
355 		} else {
356 			c2 = c;
357 			if (!casefold || (c2 = case_fold(c2)))
358 				hash = partial_name_hash(c2, hash);
359 		}
360 	}
361 	str->hash = end_name_hash(hash);
362 
363 	return 0;
364 }
365 
366 /*
367  * Compare strings with HFS+ filename ordering.
368  * Composed unicode characters are decomposed and case-folding is performed
369  * if the appropriate bits are (un)set on the superblock.
370  */
371 int hfsplus_compare_dentry(const struct dentry *parent,
372 		const struct inode *pinode,
373 		const struct dentry *dentry, const struct inode *inode,
374 		unsigned int len, const char *str, const struct qstr *name)
375 {
376 	struct super_block *sb = parent->d_sb;
377 	int casefold, decompose, size;
378 	int dsize1, dsize2, len1, len2;
379 	const u16 *dstr1, *dstr2;
380 	const char *astr1, *astr2;
381 	u16 c1, c2;
382 	wchar_t c;
383 
384 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
385 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
386 	astr1 = str;
387 	len1 = len;
388 	astr2 = name->name;
389 	len2 = name->len;
390 	dsize1 = dsize2 = 0;
391 	dstr1 = dstr2 = NULL;
392 
393 	while (len1 > 0 && len2 > 0) {
394 		if (!dsize1) {
395 			size = asc2unichar(sb, astr1, len1, &c);
396 			astr1 += size;
397 			len1 -= size;
398 
399 			if (decompose)
400 				dstr1 = decompose_unichar(c, &dsize1);
401 			if (!decompose || !dstr1) {
402 				c1 = c;
403 				dstr1 = &c1;
404 				dsize1 = 1;
405 			}
406 		}
407 
408 		if (!dsize2) {
409 			size = asc2unichar(sb, astr2, len2, &c);
410 			astr2 += size;
411 			len2 -= size;
412 
413 			if (decompose)
414 				dstr2 = decompose_unichar(c, &dsize2);
415 			if (!decompose || !dstr2) {
416 				c2 = c;
417 				dstr2 = &c2;
418 				dsize2 = 1;
419 			}
420 		}
421 
422 		c1 = *dstr1;
423 		c2 = *dstr2;
424 		if (casefold) {
425 			if  (!(c1 = case_fold(c1))) {
426 				dstr1++;
427 				dsize1--;
428 				continue;
429 			}
430 			if (!(c2 = case_fold(c2))) {
431 				dstr2++;
432 				dsize2--;
433 				continue;
434 			}
435 		}
436 		if (c1 < c2)
437 			return -1;
438 		else if (c1 > c2)
439 			return 1;
440 
441 		dstr1++;
442 		dsize1--;
443 		dstr2++;
444 		dsize2--;
445 	}
446 
447 	if (len1 < len2)
448 		return -1;
449 	if (len1 > len2)
450 		return 1;
451 	return 0;
452 }
453