1 /* 2 * linux/fs/hfsplus/unicode.c 3 * 4 * Copyright (C) 2001 5 * Brad Boyer (flar@allandria.com) 6 * (C) 2003 Ardis Technologies <roman@ardistech.com> 7 * 8 * Handler routines for unicode strings 9 */ 10 11 #include <linux/types.h> 12 #include <linux/nls.h> 13 #include "hfsplus_fs.h" 14 #include "hfsplus_raw.h" 15 16 /* Fold the case of a unicode char, given the 16 bit value */ 17 /* Returns folded char, or 0 if ignorable */ 18 static inline u16 case_fold(u16 c) 19 { 20 u16 tmp; 21 22 tmp = hfsplus_case_fold_table[c >> 8]; 23 if (tmp) 24 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; 25 else 26 tmp = c; 27 return tmp; 28 } 29 30 /* Compare unicode strings, return values like normal strcmp */ 31 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1, 32 const struct hfsplus_unistr *s2) 33 { 34 u16 len1, len2, c1, c2; 35 const hfsplus_unichr *p1, *p2; 36 37 len1 = be16_to_cpu(s1->length); 38 len2 = be16_to_cpu(s2->length); 39 p1 = s1->unicode; 40 p2 = s2->unicode; 41 42 while (1) { 43 c1 = c2 = 0; 44 45 while (len1 && !c1) { 46 c1 = case_fold(be16_to_cpu(*p1)); 47 p1++; 48 len1--; 49 } 50 while (len2 && !c2) { 51 c2 = case_fold(be16_to_cpu(*p2)); 52 p2++; 53 len2--; 54 } 55 56 if (c1 != c2) 57 return (c1 < c2) ? -1 : 1; 58 if (!c1 && !c2) 59 return 0; 60 } 61 } 62 63 /* Compare names as a sequence of 16-bit unsigned integers */ 64 int hfsplus_strcmp(const struct hfsplus_unistr *s1, 65 const struct hfsplus_unistr *s2) 66 { 67 u16 len1, len2, c1, c2; 68 const hfsplus_unichr *p1, *p2; 69 int len; 70 71 len1 = be16_to_cpu(s1->length); 72 len2 = be16_to_cpu(s2->length); 73 p1 = s1->unicode; 74 p2 = s2->unicode; 75 76 for (len = min(len1, len2); len > 0; len--) { 77 c1 = be16_to_cpu(*p1); 78 c2 = be16_to_cpu(*p2); 79 if (c1 != c2) 80 return c1 < c2 ? -1 : 1; 81 p1++; 82 p2++; 83 } 84 85 return len1 < len2 ? -1 : 86 len1 > len2 ? 1 : 0; 87 } 88 89 90 #define Hangul_SBase 0xac00 91 #define Hangul_LBase 0x1100 92 #define Hangul_VBase 0x1161 93 #define Hangul_TBase 0x11a7 94 #define Hangul_SCount 11172 95 #define Hangul_LCount 19 96 #define Hangul_VCount 21 97 #define Hangul_TCount 28 98 #define Hangul_NCount (Hangul_VCount * Hangul_TCount) 99 100 101 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) 102 { 103 int i, s, e; 104 105 s = 1; 106 e = p[1]; 107 if (!e || cc < p[s * 2] || cc > p[e * 2]) 108 return NULL; 109 do { 110 i = (s + e) / 2; 111 if (cc > p[i * 2]) 112 s = i + 1; 113 else if (cc < p[i * 2]) 114 e = i - 1; 115 else 116 return hfsplus_compose_table + p[i * 2 + 1]; 117 } while (s <= e); 118 return NULL; 119 } 120 121 int hfsplus_uni2asc(struct super_block *sb, 122 const struct hfsplus_unistr *ustr, 123 char *astr, int *len_p) 124 { 125 const hfsplus_unichr *ip; 126 struct nls_table *nls = HFSPLUS_SB(sb)->nls; 127 u8 *op; 128 u16 cc, c0, c1; 129 u16 *ce1, *ce2; 130 int i, len, ustrlen, res, compose; 131 132 op = astr; 133 ip = ustr->unicode; 134 ustrlen = be16_to_cpu(ustr->length); 135 len = *len_p; 136 ce1 = NULL; 137 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 138 139 while (ustrlen > 0) { 140 c0 = be16_to_cpu(*ip++); 141 ustrlen--; 142 /* search for single decomposed char */ 143 if (likely(compose)) 144 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); 145 if (ce1) 146 cc = ce1[0]; 147 else 148 cc = 0; 149 if (cc) { 150 /* start of a possibly decomposed Hangul char */ 151 if (cc != 0xffff) 152 goto done; 153 if (!ustrlen) 154 goto same; 155 c1 = be16_to_cpu(*ip) - Hangul_VBase; 156 if (c1 < Hangul_VCount) { 157 /* compose the Hangul char */ 158 cc = (c0 - Hangul_LBase) * Hangul_VCount; 159 cc = (cc + c1) * Hangul_TCount; 160 cc += Hangul_SBase; 161 ip++; 162 ustrlen--; 163 if (!ustrlen) 164 goto done; 165 c1 = be16_to_cpu(*ip) - Hangul_TBase; 166 if (c1 > 0 && c1 < Hangul_TCount) { 167 cc += c1; 168 ip++; 169 ustrlen--; 170 } 171 goto done; 172 } 173 } 174 while (1) { 175 /* main loop for common case of not composed chars */ 176 if (!ustrlen) 177 goto same; 178 c1 = be16_to_cpu(*ip); 179 if (likely(compose)) 180 ce1 = hfsplus_compose_lookup( 181 hfsplus_compose_table, c1); 182 if (ce1) 183 break; 184 switch (c0) { 185 case 0: 186 c0 = 0x2400; 187 break; 188 case '/': 189 c0 = ':'; 190 break; 191 } 192 res = nls->uni2char(c0, op, len); 193 if (res < 0) { 194 if (res == -ENAMETOOLONG) 195 goto out; 196 *op = '?'; 197 res = 1; 198 } 199 op += res; 200 len -= res; 201 c0 = c1; 202 ip++; 203 ustrlen--; 204 } 205 ce2 = hfsplus_compose_lookup(ce1, c0); 206 if (ce2) { 207 i = 1; 208 while (i < ustrlen) { 209 ce1 = hfsplus_compose_lookup(ce2, 210 be16_to_cpu(ip[i])); 211 if (!ce1) 212 break; 213 i++; 214 ce2 = ce1; 215 } 216 cc = ce2[0]; 217 if (cc) { 218 ip += i; 219 ustrlen -= i; 220 goto done; 221 } 222 } 223 same: 224 switch (c0) { 225 case 0: 226 cc = 0x2400; 227 break; 228 case '/': 229 cc = ':'; 230 break; 231 default: 232 cc = c0; 233 } 234 done: 235 res = nls->uni2char(cc, op, len); 236 if (res < 0) { 237 if (res == -ENAMETOOLONG) 238 goto out; 239 *op = '?'; 240 res = 1; 241 } 242 op += res; 243 len -= res; 244 } 245 res = 0; 246 out: 247 *len_p = (char *)op - astr; 248 return res; 249 } 250 251 /* 252 * Convert one or more ASCII characters into a single unicode character. 253 * Returns the number of ASCII characters corresponding to the unicode char. 254 */ 255 static inline int asc2unichar(struct super_block *sb, const char *astr, int len, 256 wchar_t *uc) 257 { 258 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); 259 if (size <= 0) { 260 *uc = '?'; 261 size = 1; 262 } 263 switch (*uc) { 264 case 0x2400: 265 *uc = 0; 266 break; 267 case ':': 268 *uc = '/'; 269 break; 270 } 271 return size; 272 } 273 274 /* Decomposes a single unicode character. */ 275 static inline u16 *decompose_unichar(wchar_t uc, int *size) 276 { 277 int off; 278 279 off = hfsplus_decompose_table[(uc >> 12) & 0xf]; 280 if (off == 0 || off == 0xffff) 281 return NULL; 282 283 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)]; 284 if (!off) 285 return NULL; 286 287 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)]; 288 if (!off) 289 return NULL; 290 291 off = hfsplus_decompose_table[off + (uc & 0xf)]; 292 *size = off & 3; 293 if (*size == 0) 294 return NULL; 295 return hfsplus_decompose_table + (off / 4); 296 } 297 298 int hfsplus_asc2uni(struct super_block *sb, 299 struct hfsplus_unistr *ustr, int max_unistr_len, 300 const char *astr, int len) 301 { 302 int size, dsize, decompose; 303 u16 *dstr, outlen = 0; 304 wchar_t c; 305 306 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 307 while (outlen < max_unistr_len && len > 0) { 308 size = asc2unichar(sb, astr, len, &c); 309 310 if (decompose) 311 dstr = decompose_unichar(c, &dsize); 312 else 313 dstr = NULL; 314 if (dstr) { 315 if (outlen + dsize > max_unistr_len) 316 break; 317 do { 318 ustr->unicode[outlen++] = cpu_to_be16(*dstr++); 319 } while (--dsize > 0); 320 } else 321 ustr->unicode[outlen++] = cpu_to_be16(c); 322 323 astr += size; 324 len -= size; 325 } 326 ustr->length = cpu_to_be16(outlen); 327 if (len > 0) 328 return -ENAMETOOLONG; 329 return 0; 330 } 331 332 /* 333 * Hash a string to an integer as appropriate for the HFS+ filesystem. 334 * Composed unicode characters are decomposed and case-folding is performed 335 * if the appropriate bits are (un)set on the superblock. 336 */ 337 int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, 338 struct qstr *str) 339 { 340 struct super_block *sb = dentry->d_sb; 341 const char *astr; 342 const u16 *dstr; 343 int casefold, decompose, size, len; 344 unsigned long hash; 345 wchar_t c; 346 u16 c2; 347 348 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 349 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 350 hash = init_name_hash(); 351 astr = str->name; 352 len = str->len; 353 while (len > 0) { 354 int uninitialized_var(dsize); 355 size = asc2unichar(sb, astr, len, &c); 356 astr += size; 357 len -= size; 358 359 if (decompose) 360 dstr = decompose_unichar(c, &dsize); 361 else 362 dstr = NULL; 363 if (dstr) { 364 do { 365 c2 = *dstr++; 366 if (casefold) 367 c2 = case_fold(c2); 368 if (!casefold || c2) 369 hash = partial_name_hash(c2, hash); 370 } while (--dsize > 0); 371 } else { 372 c2 = c; 373 if (casefold) 374 c2 = case_fold(c2); 375 if (!casefold || c2) 376 hash = partial_name_hash(c2, hash); 377 } 378 } 379 str->hash = end_name_hash(hash); 380 381 return 0; 382 } 383 384 /* 385 * Compare strings with HFS+ filename ordering. 386 * Composed unicode characters are decomposed and case-folding is performed 387 * if the appropriate bits are (un)set on the superblock. 388 */ 389 int hfsplus_compare_dentry(const struct dentry *parent, 390 const struct inode *pinode, 391 const struct dentry *dentry, const struct inode *inode, 392 unsigned int len, const char *str, const struct qstr *name) 393 { 394 struct super_block *sb = parent->d_sb; 395 int casefold, decompose, size; 396 int dsize1, dsize2, len1, len2; 397 const u16 *dstr1, *dstr2; 398 const char *astr1, *astr2; 399 u16 c1, c2; 400 wchar_t c; 401 402 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 403 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 404 astr1 = str; 405 len1 = len; 406 astr2 = name->name; 407 len2 = name->len; 408 dsize1 = dsize2 = 0; 409 dstr1 = dstr2 = NULL; 410 411 while (len1 > 0 && len2 > 0) { 412 if (!dsize1) { 413 size = asc2unichar(sb, astr1, len1, &c); 414 astr1 += size; 415 len1 -= size; 416 417 if (decompose) 418 dstr1 = decompose_unichar(c, &dsize1); 419 if (!decompose || !dstr1) { 420 c1 = c; 421 dstr1 = &c1; 422 dsize1 = 1; 423 } 424 } 425 426 if (!dsize2) { 427 size = asc2unichar(sb, astr2, len2, &c); 428 astr2 += size; 429 len2 -= size; 430 431 if (decompose) 432 dstr2 = decompose_unichar(c, &dsize2); 433 if (!decompose || !dstr2) { 434 c2 = c; 435 dstr2 = &c2; 436 dsize2 = 1; 437 } 438 } 439 440 c1 = *dstr1; 441 c2 = *dstr2; 442 if (casefold) { 443 c1 = case_fold(c1); 444 if (!c1) { 445 dstr1++; 446 dsize1--; 447 continue; 448 } 449 c2 = case_fold(c2); 450 if (!c2) { 451 dstr2++; 452 dsize2--; 453 continue; 454 } 455 } 456 if (c1 < c2) 457 return -1; 458 else if (c1 > c2) 459 return 1; 460 461 dstr1++; 462 dsize1--; 463 dstr2++; 464 dsize2--; 465 } 466 467 if (len1 < len2) 468 return -1; 469 if (len1 > len2) 470 return 1; 471 return 0; 472 } 473