xref: /openbmc/linux/fs/hfsplus/unicode.c (revision d78c317f)
1 /*
2  *  linux/fs/hfsplus/unicode.c
3  *
4  * Copyright (C) 2001
5  * Brad Boyer (flar@allandria.com)
6  * (C) 2003 Ardis Technologies <roman@ardistech.com>
7  *
8  * Handler routines for unicode strings
9  */
10 
11 #include <linux/types.h>
12 #include <linux/nls.h>
13 #include "hfsplus_fs.h"
14 #include "hfsplus_raw.h"
15 
16 /* Fold the case of a unicode char, given the 16 bit value */
17 /* Returns folded char, or 0 if ignorable */
18 static inline u16 case_fold(u16 c)
19 {
20 	u16 tmp;
21 
22 	tmp = hfsplus_case_fold_table[c >> 8];
23 	if (tmp)
24 		tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
25 	else
26 		tmp = c;
27 	return tmp;
28 }
29 
30 /* Compare unicode strings, return values like normal strcmp */
31 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
32 		       const struct hfsplus_unistr *s2)
33 {
34 	u16 len1, len2, c1, c2;
35 	const hfsplus_unichr *p1, *p2;
36 
37 	len1 = be16_to_cpu(s1->length);
38 	len2 = be16_to_cpu(s2->length);
39 	p1 = s1->unicode;
40 	p2 = s2->unicode;
41 
42 	while (1) {
43 		c1 = c2 = 0;
44 
45 		while (len1 && !c1) {
46 			c1 = case_fold(be16_to_cpu(*p1));
47 			p1++;
48 			len1--;
49 		}
50 		while (len2 && !c2) {
51 			c2 = case_fold(be16_to_cpu(*p2));
52 			p2++;
53 			len2--;
54 		}
55 
56 		if (c1 != c2)
57 			return (c1 < c2) ? -1 : 1;
58 		if (!c1 && !c2)
59 			return 0;
60 	}
61 }
62 
63 /* Compare names as a sequence of 16-bit unsigned integers */
64 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
65 		   const struct hfsplus_unistr *s2)
66 {
67 	u16 len1, len2, c1, c2;
68 	const hfsplus_unichr *p1, *p2;
69 	int len;
70 
71 	len1 = be16_to_cpu(s1->length);
72 	len2 = be16_to_cpu(s2->length);
73 	p1 = s1->unicode;
74 	p2 = s2->unicode;
75 
76 	for (len = min(len1, len2); len > 0; len--) {
77 		c1 = be16_to_cpu(*p1);
78 		c2 = be16_to_cpu(*p2);
79 		if (c1 != c2)
80 			return c1 < c2 ? -1 : 1;
81 		p1++;
82 		p2++;
83 	}
84 
85 	return len1 < len2 ? -1 :
86 	       len1 > len2 ? 1 : 0;
87 }
88 
89 
90 #define Hangul_SBase	0xac00
91 #define Hangul_LBase	0x1100
92 #define Hangul_VBase	0x1161
93 #define Hangul_TBase	0x11a7
94 #define Hangul_SCount	11172
95 #define Hangul_LCount	19
96 #define Hangul_VCount	21
97 #define Hangul_TCount	28
98 #define Hangul_NCount	(Hangul_VCount * Hangul_TCount)
99 
100 
101 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
102 {
103 	int i, s, e;
104 
105 	s = 1;
106 	e = p[1];
107 	if (!e || cc < p[s * 2] || cc > p[e * 2])
108 		return NULL;
109 	do {
110 		i = (s + e) / 2;
111 		if (cc > p[i * 2])
112 			s = i + 1;
113 		else if (cc < p[i * 2])
114 			e = i - 1;
115 		else
116 			return hfsplus_compose_table + p[i * 2 + 1];
117 	} while (s <= e);
118 	return NULL;
119 }
120 
121 int hfsplus_uni2asc(struct super_block *sb,
122 		const struct hfsplus_unistr *ustr,
123 		char *astr, int *len_p)
124 {
125 	const hfsplus_unichr *ip;
126 	struct nls_table *nls = HFSPLUS_SB(sb)->nls;
127 	u8 *op;
128 	u16 cc, c0, c1;
129 	u16 *ce1, *ce2;
130 	int i, len, ustrlen, res, compose;
131 
132 	op = astr;
133 	ip = ustr->unicode;
134 	ustrlen = be16_to_cpu(ustr->length);
135 	len = *len_p;
136 	ce1 = NULL;
137 	compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
138 
139 	while (ustrlen > 0) {
140 		c0 = be16_to_cpu(*ip++);
141 		ustrlen--;
142 		/* search for single decomposed char */
143 		if (likely(compose))
144 			ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
145 		if (ce1)
146 			cc = ce1[0];
147 		else
148 			cc = 0;
149 		if (cc) {
150 			/* start of a possibly decomposed Hangul char */
151 			if (cc != 0xffff)
152 				goto done;
153 			if (!ustrlen)
154 				goto same;
155 			c1 = be16_to_cpu(*ip) - Hangul_VBase;
156 			if (c1 < Hangul_VCount) {
157 				/* compose the Hangul char */
158 				cc = (c0 - Hangul_LBase) * Hangul_VCount;
159 				cc = (cc + c1) * Hangul_TCount;
160 				cc += Hangul_SBase;
161 				ip++;
162 				ustrlen--;
163 				if (!ustrlen)
164 					goto done;
165 				c1 = be16_to_cpu(*ip) - Hangul_TBase;
166 				if (c1 > 0 && c1 < Hangul_TCount) {
167 					cc += c1;
168 					ip++;
169 					ustrlen--;
170 				}
171 				goto done;
172 			}
173 		}
174 		while (1) {
175 			/* main loop for common case of not composed chars */
176 			if (!ustrlen)
177 				goto same;
178 			c1 = be16_to_cpu(*ip);
179 			if (likely(compose))
180 				ce1 = hfsplus_compose_lookup(
181 					hfsplus_compose_table, c1);
182 			if (ce1)
183 				break;
184 			switch (c0) {
185 			case 0:
186 				c0 = 0x2400;
187 				break;
188 			case '/':
189 				c0 = ':';
190 				break;
191 			}
192 			res = nls->uni2char(c0, op, len);
193 			if (res < 0) {
194 				if (res == -ENAMETOOLONG)
195 					goto out;
196 				*op = '?';
197 				res = 1;
198 			}
199 			op += res;
200 			len -= res;
201 			c0 = c1;
202 			ip++;
203 			ustrlen--;
204 		}
205 		ce2 = hfsplus_compose_lookup(ce1, c0);
206 		if (ce2) {
207 			i = 1;
208 			while (i < ustrlen) {
209 				ce1 = hfsplus_compose_lookup(ce2,
210 					be16_to_cpu(ip[i]));
211 				if (!ce1)
212 					break;
213 				i++;
214 				ce2 = ce1;
215 			}
216 			cc = ce2[0];
217 			if (cc) {
218 				ip += i;
219 				ustrlen -= i;
220 				goto done;
221 			}
222 		}
223 same:
224 		switch (c0) {
225 		case 0:
226 			cc = 0x2400;
227 			break;
228 		case '/':
229 			cc = ':';
230 			break;
231 		default:
232 			cc = c0;
233 		}
234 done:
235 		res = nls->uni2char(cc, op, len);
236 		if (res < 0) {
237 			if (res == -ENAMETOOLONG)
238 				goto out;
239 			*op = '?';
240 			res = 1;
241 		}
242 		op += res;
243 		len -= res;
244 	}
245 	res = 0;
246 out:
247 	*len_p = (char *)op - astr;
248 	return res;
249 }
250 
251 /*
252  * Convert one or more ASCII characters into a single unicode character.
253  * Returns the number of ASCII characters corresponding to the unicode char.
254  */
255 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
256 			      wchar_t *uc)
257 {
258 	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
259 	if (size <= 0) {
260 		*uc = '?';
261 		size = 1;
262 	}
263 	switch (*uc) {
264 	case 0x2400:
265 		*uc = 0;
266 		break;
267 	case ':':
268 		*uc = '/';
269 		break;
270 	}
271 	return size;
272 }
273 
274 /* Decomposes a single unicode character. */
275 static inline u16 *decompose_unichar(wchar_t uc, int *size)
276 {
277 	int off;
278 
279 	off = hfsplus_decompose_table[(uc >> 12) & 0xf];
280 	if (off == 0 || off == 0xffff)
281 		return NULL;
282 
283 	off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
284 	if (!off)
285 		return NULL;
286 
287 	off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
288 	if (!off)
289 		return NULL;
290 
291 	off = hfsplus_decompose_table[off + (uc & 0xf)];
292 	*size = off & 3;
293 	if (*size == 0)
294 		return NULL;
295 	return hfsplus_decompose_table + (off / 4);
296 }
297 
298 int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
299 		    const char *astr, int len)
300 {
301 	int size, dsize, decompose;
302 	u16 *dstr, outlen = 0;
303 	wchar_t c;
304 
305 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
306 	while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
307 		size = asc2unichar(sb, astr, len, &c);
308 
309 		if (decompose)
310 			dstr = decompose_unichar(c, &dsize);
311 		else
312 			dstr = NULL;
313 		if (dstr) {
314 			if (outlen + dsize > HFSPLUS_MAX_STRLEN)
315 				break;
316 			do {
317 				ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
318 			} while (--dsize > 0);
319 		} else
320 			ustr->unicode[outlen++] = cpu_to_be16(c);
321 
322 		astr += size;
323 		len -= size;
324 	}
325 	ustr->length = cpu_to_be16(outlen);
326 	if (len > 0)
327 		return -ENAMETOOLONG;
328 	return 0;
329 }
330 
331 /*
332  * Hash a string to an integer as appropriate for the HFS+ filesystem.
333  * Composed unicode characters are decomposed and case-folding is performed
334  * if the appropriate bits are (un)set on the superblock.
335  */
336 int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
337 		struct qstr *str)
338 {
339 	struct super_block *sb = dentry->d_sb;
340 	const char *astr;
341 	const u16 *dstr;
342 	int casefold, decompose, size, len;
343 	unsigned long hash;
344 	wchar_t c;
345 	u16 c2;
346 
347 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
348 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
349 	hash = init_name_hash();
350 	astr = str->name;
351 	len = str->len;
352 	while (len > 0) {
353 		int uninitialized_var(dsize);
354 		size = asc2unichar(sb, astr, len, &c);
355 		astr += size;
356 		len -= size;
357 
358 		if (decompose)
359 			dstr = decompose_unichar(c, &dsize);
360 		else
361 			dstr = NULL;
362 		if (dstr) {
363 			do {
364 				c2 = *dstr++;
365 				if (casefold)
366 					c2 = case_fold(c2);
367 				if (!casefold || c2)
368 					hash = partial_name_hash(c2, hash);
369 			} while (--dsize > 0);
370 		} else {
371 			c2 = c;
372 			if (casefold)
373 				c2 = case_fold(c2);
374 			if (!casefold || c2)
375 				hash = partial_name_hash(c2, hash);
376 		}
377 	}
378 	str->hash = end_name_hash(hash);
379 
380 	return 0;
381 }
382 
383 /*
384  * Compare strings with HFS+ filename ordering.
385  * Composed unicode characters are decomposed and case-folding is performed
386  * if the appropriate bits are (un)set on the superblock.
387  */
388 int hfsplus_compare_dentry(const struct dentry *parent,
389 		const struct inode *pinode,
390 		const struct dentry *dentry, const struct inode *inode,
391 		unsigned int len, const char *str, const struct qstr *name)
392 {
393 	struct super_block *sb = parent->d_sb;
394 	int casefold, decompose, size;
395 	int dsize1, dsize2, len1, len2;
396 	const u16 *dstr1, *dstr2;
397 	const char *astr1, *astr2;
398 	u16 c1, c2;
399 	wchar_t c;
400 
401 	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
402 	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
403 	astr1 = str;
404 	len1 = len;
405 	astr2 = name->name;
406 	len2 = name->len;
407 	dsize1 = dsize2 = 0;
408 	dstr1 = dstr2 = NULL;
409 
410 	while (len1 > 0 && len2 > 0) {
411 		if (!dsize1) {
412 			size = asc2unichar(sb, astr1, len1, &c);
413 			astr1 += size;
414 			len1 -= size;
415 
416 			if (decompose)
417 				dstr1 = decompose_unichar(c, &dsize1);
418 			if (!decompose || !dstr1) {
419 				c1 = c;
420 				dstr1 = &c1;
421 				dsize1 = 1;
422 			}
423 		}
424 
425 		if (!dsize2) {
426 			size = asc2unichar(sb, astr2, len2, &c);
427 			astr2 += size;
428 			len2 -= size;
429 
430 			if (decompose)
431 				dstr2 = decompose_unichar(c, &dsize2);
432 			if (!decompose || !dstr2) {
433 				c2 = c;
434 				dstr2 = &c2;
435 				dsize2 = 1;
436 			}
437 		}
438 
439 		c1 = *dstr1;
440 		c2 = *dstr2;
441 		if (casefold) {
442 			c1 = case_fold(c1);
443 			if (!c1) {
444 				dstr1++;
445 				dsize1--;
446 				continue;
447 			}
448 			c2 = case_fold(c2);
449 			if (!c2) {
450 				dstr2++;
451 				dsize2--;
452 				continue;
453 			}
454 		}
455 		if (c1 < c2)
456 			return -1;
457 		else if (c1 > c2)
458 			return 1;
459 
460 		dstr1++;
461 		dsize1--;
462 		dstr2++;
463 		dsize2--;
464 	}
465 
466 	if (len1 < len2)
467 		return -1;
468 	if (len1 > len2)
469 		return 1;
470 	return 0;
471 }
472