1 /* 2 * linux/fs/hfsplus/unicode.c 3 * 4 * Copyright (C) 2001 5 * Brad Boyer (flar@allandria.com) 6 * (C) 2003 Ardis Technologies <roman@ardistech.com> 7 * 8 * Handler routines for unicode strings 9 */ 10 11 #include <linux/types.h> 12 #include <linux/nls.h> 13 #include "hfsplus_fs.h" 14 #include "hfsplus_raw.h" 15 16 /* Fold the case of a unicode char, given the 16 bit value */ 17 /* Returns folded char, or 0 if ignorable */ 18 static inline u16 case_fold(u16 c) 19 { 20 u16 tmp; 21 22 tmp = hfsplus_case_fold_table[c >> 8]; 23 if (tmp) 24 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; 25 else 26 tmp = c; 27 return tmp; 28 } 29 30 /* Compare unicode strings, return values like normal strcmp */ 31 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1, 32 const struct hfsplus_unistr *s2) 33 { 34 u16 len1, len2, c1, c2; 35 const hfsplus_unichr *p1, *p2; 36 37 len1 = be16_to_cpu(s1->length); 38 len2 = be16_to_cpu(s2->length); 39 p1 = s1->unicode; 40 p2 = s2->unicode; 41 42 while (1) { 43 c1 = c2 = 0; 44 45 while (len1 && !c1) { 46 c1 = case_fold(be16_to_cpu(*p1)); 47 p1++; 48 len1--; 49 } 50 while (len2 && !c2) { 51 c2 = case_fold(be16_to_cpu(*p2)); 52 p2++; 53 len2--; 54 } 55 56 if (c1 != c2) 57 return (c1 < c2) ? -1 : 1; 58 if (!c1 && !c2) 59 return 0; 60 } 61 } 62 63 /* Compare names as a sequence of 16-bit unsigned integers */ 64 int hfsplus_strcmp(const struct hfsplus_unistr *s1, 65 const struct hfsplus_unistr *s2) 66 { 67 u16 len1, len2, c1, c2; 68 const hfsplus_unichr *p1, *p2; 69 int len; 70 71 len1 = be16_to_cpu(s1->length); 72 len2 = be16_to_cpu(s2->length); 73 p1 = s1->unicode; 74 p2 = s2->unicode; 75 76 for (len = min(len1, len2); len > 0; len--) { 77 c1 = be16_to_cpu(*p1); 78 c2 = be16_to_cpu(*p2); 79 if (c1 != c2) 80 return c1 < c2 ? -1 : 1; 81 p1++; 82 p2++; 83 } 84 85 return len1 < len2 ? -1 : 86 len1 > len2 ? 1 : 0; 87 } 88 89 90 #define Hangul_SBase 0xac00 91 #define Hangul_LBase 0x1100 92 #define Hangul_VBase 0x1161 93 #define Hangul_TBase 0x11a7 94 #define Hangul_SCount 11172 95 #define Hangul_LCount 19 96 #define Hangul_VCount 21 97 #define Hangul_TCount 28 98 #define Hangul_NCount (Hangul_VCount * Hangul_TCount) 99 100 101 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) 102 { 103 int i, s, e; 104 105 s = 1; 106 e = p[1]; 107 if (!e || cc < p[s * 2] || cc > p[e * 2]) 108 return NULL; 109 do { 110 i = (s + e) / 2; 111 if (cc > p[i * 2]) 112 s = i + 1; 113 else if (cc < p[i * 2]) 114 e = i - 1; 115 else 116 return hfsplus_compose_table + p[i * 2 + 1]; 117 } while (s <= e); 118 return NULL; 119 } 120 121 int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) 122 { 123 const hfsplus_unichr *ip; 124 struct nls_table *nls = HFSPLUS_SB(sb).nls; 125 u8 *op; 126 u16 cc, c0, c1; 127 u16 *ce1, *ce2; 128 int i, len, ustrlen, res, compose; 129 130 op = astr; 131 ip = ustr->unicode; 132 ustrlen = be16_to_cpu(ustr->length); 133 len = *len_p; 134 ce1 = NULL; 135 compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); 136 137 while (ustrlen > 0) { 138 c0 = be16_to_cpu(*ip++); 139 ustrlen--; 140 /* search for single decomposed char */ 141 if (likely(compose)) 142 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); 143 if (ce1 && (cc = ce1[0])) { 144 /* start of a possibly decomposed Hangul char */ 145 if (cc != 0xffff) 146 goto done; 147 if (!ustrlen) 148 goto same; 149 c1 = be16_to_cpu(*ip) - Hangul_VBase; 150 if (c1 < Hangul_VCount) { 151 /* compose the Hangul char */ 152 cc = (c0 - Hangul_LBase) * Hangul_VCount; 153 cc = (cc + c1) * Hangul_TCount; 154 cc += Hangul_SBase; 155 ip++; 156 ustrlen--; 157 if (!ustrlen) 158 goto done; 159 c1 = be16_to_cpu(*ip) - Hangul_TBase; 160 if (c1 > 0 && c1 < Hangul_TCount) { 161 cc += c1; 162 ip++; 163 ustrlen--; 164 } 165 goto done; 166 } 167 } 168 while (1) { 169 /* main loop for common case of not composed chars */ 170 if (!ustrlen) 171 goto same; 172 c1 = be16_to_cpu(*ip); 173 if (likely(compose)) 174 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c1); 175 if (ce1) 176 break; 177 switch (c0) { 178 case 0: 179 c0 = 0x2400; 180 break; 181 case '/': 182 c0 = ':'; 183 break; 184 } 185 res = nls->uni2char(c0, op, len); 186 if (res < 0) { 187 if (res == -ENAMETOOLONG) 188 goto out; 189 *op = '?'; 190 res = 1; 191 } 192 op += res; 193 len -= res; 194 c0 = c1; 195 ip++; 196 ustrlen--; 197 } 198 ce2 = hfsplus_compose_lookup(ce1, c0); 199 if (ce2) { 200 i = 1; 201 while (i < ustrlen) { 202 ce1 = hfsplus_compose_lookup(ce2, be16_to_cpu(ip[i])); 203 if (!ce1) 204 break; 205 i++; 206 ce2 = ce1; 207 } 208 if ((cc = ce2[0])) { 209 ip += i; 210 ustrlen -= i; 211 goto done; 212 } 213 } 214 same: 215 switch (c0) { 216 case 0: 217 cc = 0x2400; 218 break; 219 case '/': 220 cc = ':'; 221 break; 222 default: 223 cc = c0; 224 } 225 done: 226 res = nls->uni2char(cc, op, len); 227 if (res < 0) { 228 if (res == -ENAMETOOLONG) 229 goto out; 230 *op = '?'; 231 res = 1; 232 } 233 op += res; 234 len -= res; 235 } 236 res = 0; 237 out: 238 *len_p = (char *)op - astr; 239 return res; 240 } 241 242 /* 243 * Convert one or more ASCII characters into a single unicode character. 244 * Returns the number of ASCII characters corresponding to the unicode char. 245 */ 246 static inline int asc2unichar(struct super_block *sb, const char *astr, int len, 247 wchar_t *uc) 248 { 249 int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc); 250 if (size <= 0) { 251 *uc = '?'; 252 size = 1; 253 } 254 switch (*uc) { 255 case 0x2400: 256 *uc = 0; 257 break; 258 case ':': 259 *uc = '/'; 260 break; 261 } 262 return size; 263 } 264 265 /* Decomposes a single unicode character. */ 266 static inline u16 *decompose_unichar(wchar_t uc, int *size) 267 { 268 int off; 269 270 off = hfsplus_decompose_table[(uc >> 12) & 0xf]; 271 if (off == 0 || off == 0xffff) 272 return NULL; 273 274 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)]; 275 if (!off) 276 return NULL; 277 278 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)]; 279 if (!off) 280 return NULL; 281 282 off = hfsplus_decompose_table[off + (uc & 0xf)]; 283 *size = off & 3; 284 if (*size == 0) 285 return NULL; 286 return hfsplus_decompose_table + (off / 4); 287 } 288 289 int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, 290 const char *astr, int len) 291 { 292 int size, dsize, decompose; 293 u16 *dstr, outlen = 0; 294 wchar_t c; 295 296 decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); 297 while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { 298 size = asc2unichar(sb, astr, len, &c); 299 300 if (decompose && (dstr = decompose_unichar(c, &dsize))) { 301 if (outlen + dsize > HFSPLUS_MAX_STRLEN) 302 break; 303 do { 304 ustr->unicode[outlen++] = cpu_to_be16(*dstr++); 305 } while (--dsize > 0); 306 } else 307 ustr->unicode[outlen++] = cpu_to_be16(c); 308 309 astr += size; 310 len -= size; 311 } 312 ustr->length = cpu_to_be16(outlen); 313 if (len > 0) 314 return -ENAMETOOLONG; 315 return 0; 316 } 317 318 /* 319 * Hash a string to an integer as appropriate for the HFS+ filesystem. 320 * Composed unicode characters are decomposed and case-folding is performed 321 * if the appropriate bits are (un)set on the superblock. 322 */ 323 int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) 324 { 325 struct super_block *sb = dentry->d_sb; 326 const char *astr; 327 const u16 *dstr; 328 int casefold, decompose, size, dsize, len; 329 unsigned long hash; 330 wchar_t c; 331 u16 c2; 332 333 casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); 334 decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); 335 hash = init_name_hash(); 336 astr = str->name; 337 len = str->len; 338 while (len > 0) { 339 size = asc2unichar(sb, astr, len, &c); 340 astr += size; 341 len -= size; 342 343 if (decompose && (dstr = decompose_unichar(c, &dsize))) { 344 do { 345 c2 = *dstr++; 346 if (!casefold || (c2 = case_fold(c2))) 347 hash = partial_name_hash(c2, hash); 348 } while (--dsize > 0); 349 } else { 350 c2 = c; 351 if (!casefold || (c2 = case_fold(c2))) 352 hash = partial_name_hash(c2, hash); 353 } 354 } 355 str->hash = end_name_hash(hash); 356 357 return 0; 358 } 359 360 /* 361 * Compare strings with HFS+ filename ordering. 362 * Composed unicode characters are decomposed and case-folding is performed 363 * if the appropriate bits are (un)set on the superblock. 364 */ 365 int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) 366 { 367 struct super_block *sb = dentry->d_sb; 368 int casefold, decompose, size; 369 int dsize1, dsize2, len1, len2; 370 const u16 *dstr1, *dstr2; 371 const char *astr1, *astr2; 372 u16 c1, c2; 373 wchar_t c; 374 375 casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); 376 decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); 377 astr1 = s1->name; 378 len1 = s1->len; 379 astr2 = s2->name; 380 len2 = s2->len; 381 dsize1 = dsize2 = 0; 382 dstr1 = dstr2 = NULL; 383 384 while (len1 > 0 && len2 > 0) { 385 if (!dsize1) { 386 size = asc2unichar(sb, astr1, len1, &c); 387 astr1 += size; 388 len1 -= size; 389 390 if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) { 391 c1 = c; 392 dstr1 = &c1; 393 dsize1 = 1; 394 } 395 } 396 397 if (!dsize2) { 398 size = asc2unichar(sb, astr2, len2, &c); 399 astr2 += size; 400 len2 -= size; 401 402 if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) { 403 c2 = c; 404 dstr2 = &c2; 405 dsize2 = 1; 406 } 407 } 408 409 c1 = *dstr1; 410 c2 = *dstr2; 411 if (casefold) { 412 if (!(c1 = case_fold(c1))) { 413 dstr1++; 414 dsize1--; 415 continue; 416 } 417 if (!(c2 = case_fold(c2))) { 418 dstr2++; 419 dsize2--; 420 continue; 421 } 422 } 423 if (c1 < c2) 424 return -1; 425 else if (c1 > c2) 426 return 1; 427 428 dstr1++; 429 dsize1--; 430 dstr2++; 431 dsize2--; 432 } 433 434 if (len1 < len2) 435 return -1; 436 if (len1 > len2) 437 return 1; 438 return 0; 439 } 440