1 /* 2 * unicode.c 3 * 4 * PURPOSE 5 * Routines for converting between UTF-8 and OSTA Compressed Unicode. 6 * Also handles filename mangling 7 * 8 * DESCRIPTION 9 * OSTA Compressed Unicode is explained in the OSTA UDF specification. 10 * http://www.osta.org/ 11 * UTF-8 is explained in the IETF RFC XXXX. 12 * ftp://ftp.internic.net/rfc/rfcxxxx.txt 13 * 14 * COPYRIGHT 15 * This file is distributed under the terms of the GNU General Public 16 * License (GPL). Copies of the GPL can be obtained from: 17 * ftp://prep.ai.mit.edu/pub/gnu/GPL 18 * Each contributing author retains all rights to their own work. 19 */ 20 21 #include "udfdecl.h" 22 23 #include <linux/kernel.h> 24 #include <linux/string.h> /* for memset */ 25 #include <linux/nls.h> 26 #include <linux/crc-itu-t.h> 27 #include <linux/slab.h> 28 29 #include "udf_sb.h" 30 31 static int udf_translate_to_linux(uint8_t *, int, const uint8_t *, int, 32 const uint8_t *, int); 33 34 static int udf_uni2char_utf8(wchar_t uni, 35 unsigned char *out, 36 int boundlen) 37 { 38 int u_len = 0; 39 40 if (boundlen <= 0) 41 return -ENAMETOOLONG; 42 43 if (uni < 0x80) { 44 out[u_len++] = (unsigned char)uni; 45 } else if (uni < 0x800) { 46 if (boundlen < 2) 47 return -ENAMETOOLONG; 48 out[u_len++] = (unsigned char)(0xc0 | (uni >> 6)); 49 out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); 50 } else { 51 if (boundlen < 3) 52 return -ENAMETOOLONG; 53 out[u_len++] = (unsigned char)(0xe0 | (uni >> 12)); 54 out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f)); 55 out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); 56 } 57 return u_len; 58 } 59 60 static int udf_char2uni_utf8(const unsigned char *in, 61 int boundlen, 62 wchar_t *uni) 63 { 64 unsigned int utf_char; 65 unsigned char c; 66 int utf_cnt, u_len; 67 68 utf_char = 0; 69 utf_cnt = 0; 70 for (u_len = 0; u_len < boundlen;) { 71 c = in[u_len++]; 72 73 /* Complete a multi-byte UTF-8 character */ 74 if (utf_cnt) { 75 utf_char = (utf_char << 6) | (c & 0x3f); 76 if (--utf_cnt) 77 continue; 78 } else { 79 /* Check for a multi-byte UTF-8 character */ 80 if (c & 0x80) { 81 /* Start a multi-byte UTF-8 character */ 82 if ((c & 0xe0) == 0xc0) { 83 utf_char = c & 0x1f; 84 utf_cnt = 1; 85 } else if ((c & 0xf0) == 0xe0) { 86 utf_char = c & 0x0f; 87 utf_cnt = 2; 88 } else if ((c & 0xf8) == 0xf0) { 89 utf_char = c & 0x07; 90 utf_cnt = 3; 91 } else if ((c & 0xfc) == 0xf8) { 92 utf_char = c & 0x03; 93 utf_cnt = 4; 94 } else if ((c & 0xfe) == 0xfc) { 95 utf_char = c & 0x01; 96 utf_cnt = 5; 97 } else { 98 utf_cnt = -1; 99 break; 100 } 101 continue; 102 } else { 103 /* Single byte UTF-8 character (most common) */ 104 utf_char = c; 105 } 106 } 107 *uni = utf_char; 108 break; 109 } 110 if (utf_cnt) { 111 *uni = '?'; 112 return -EINVAL; 113 } 114 return u_len; 115 } 116 117 static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, 118 const uint8_t *ocu, int ocu_len, 119 int (*conv_f)(wchar_t, unsigned char *, int)) 120 { 121 uint8_t cmp_id; 122 int i, len; 123 int str_o_len = 0; 124 125 if (str_max_len <= 0) 126 return 0; 127 128 if (ocu_len == 0) { 129 memset(str_o, 0, str_max_len); 130 return 0; 131 } 132 133 cmp_id = ocu[0]; 134 if (cmp_id != 8 && cmp_id != 16) { 135 memset(str_o, 0, str_max_len); 136 pr_err("unknown compression code (%d) stri=%s\n", cmp_id, ocu); 137 return -EINVAL; 138 } 139 140 for (i = 1; (i < ocu_len) && (str_o_len < str_max_len);) { 141 /* Expand OSTA compressed Unicode to Unicode */ 142 uint32_t c = ocu[i++]; 143 if (cmp_id == 16) 144 c = (c << 8) | ocu[i++]; 145 146 len = conv_f(c, &str_o[str_o_len], str_max_len - str_o_len); 147 /* Valid character? */ 148 if (len >= 0) 149 str_o_len += len; 150 else if (len == -ENAMETOOLONG) 151 break; 152 else 153 str_o[str_o_len++] = '?'; 154 } 155 156 return str_o_len; 157 } 158 159 static int udf_name_to_CS0(uint8_t *ocu, int ocu_max_len, 160 const uint8_t *str_i, int str_len, 161 int (*conv_f)(const unsigned char *, int, wchar_t *)) 162 { 163 int i, len; 164 unsigned int max_val; 165 wchar_t uni_char; 166 int u_len, u_ch; 167 168 if (ocu_max_len <= 0) 169 return 0; 170 171 memset(ocu, 0, ocu_max_len); 172 ocu[0] = 8; 173 max_val = 0xff; 174 u_ch = 1; 175 176 try_again: 177 u_len = 1; 178 for (i = 0; i < str_len; i++) { 179 /* Name didn't fit? */ 180 if (u_len + u_ch > ocu_max_len) 181 return 0; 182 len = conv_f(&str_i[i], str_len - i, &uni_char); 183 if (!len) 184 continue; 185 /* Invalid character, deal with it */ 186 if (len < 0) { 187 len = 1; 188 uni_char = '?'; 189 } 190 191 if (uni_char > max_val) { 192 max_val = 0xffff; 193 ocu[0] = 0x10; 194 u_ch = 2; 195 goto try_again; 196 } 197 198 if (max_val == 0xffff) 199 ocu[u_len++] = (uint8_t)(uni_char >> 8); 200 ocu[u_len++] = (uint8_t)(uni_char & 0xff); 201 i += len - 1; 202 } 203 204 return u_len; 205 } 206 207 int udf_CS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len) 208 { 209 return udf_name_from_CS0(utf_o, o_len, ocu_i, i_len, 210 udf_uni2char_utf8); 211 } 212 213 int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, 214 uint8_t *dname, int dlen) 215 { 216 uint8_t *filename; 217 int (*conv_f)(wchar_t, unsigned char *, int); 218 int ret; 219 220 if (!slen) 221 return -EIO; 222 223 if (dlen <= 0) 224 return 0; 225 226 filename = kmalloc(dlen, GFP_NOFS); 227 if (!filename) 228 return -ENOMEM; 229 230 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { 231 conv_f = udf_uni2char_utf8; 232 } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { 233 conv_f = UDF_SB(sb)->s_nls_map->uni2char; 234 } else 235 BUG(); 236 237 ret = udf_name_from_CS0(filename, dlen, sname, slen, conv_f); 238 if (ret < 0) { 239 udf_debug("Failed in udf_get_filename: sname = %s\n", sname); 240 goto out2; 241 } 242 243 ret = udf_translate_to_linux(dname, dlen, filename, dlen, 244 sname + 1, slen - 1); 245 /* Zero length filename isn't valid... */ 246 if (ret == 0) 247 ret = -EINVAL; 248 out2: 249 kfree(filename); 250 return ret; 251 } 252 253 int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, 254 uint8_t *dname, int dlen) 255 { 256 int (*conv_f)(const unsigned char *, int, wchar_t *); 257 258 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { 259 conv_f = udf_char2uni_utf8; 260 } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { 261 conv_f = UDF_SB(sb)->s_nls_map->char2uni; 262 } else 263 BUG(); 264 265 return udf_name_to_CS0(dname, dlen, sname, slen, conv_f); 266 } 267 268 #define ILLEGAL_CHAR_MARK '_' 269 #define EXT_MARK '.' 270 #define CRC_MARK '#' 271 #define EXT_SIZE 5 272 /* Number of chars we need to store generated CRC to make filename unique */ 273 #define CRC_LEN 5 274 275 static int udf_translate_to_linux(uint8_t *newName, int newLen, 276 const uint8_t *udfName, int udfLen, 277 const uint8_t *fidName, int fidNameLen) 278 { 279 int index, newIndex = 0, needsCRC = 0; 280 int extIndex = 0, newExtIndex = 0, hasExt = 0; 281 unsigned short valueCRC; 282 uint8_t curr; 283 284 if (udfName[0] == '.' && 285 (udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) { 286 needsCRC = 1; 287 newIndex = udfLen; 288 memcpy(newName, udfName, udfLen); 289 } else { 290 for (index = 0; index < udfLen; index++) { 291 curr = udfName[index]; 292 if (curr == '/' || curr == 0) { 293 needsCRC = 1; 294 curr = ILLEGAL_CHAR_MARK; 295 while (index + 1 < udfLen && 296 (udfName[index + 1] == '/' || 297 udfName[index + 1] == 0)) 298 index++; 299 } 300 if (curr == EXT_MARK && 301 (udfLen - index - 1) <= EXT_SIZE) { 302 if (udfLen == index + 1) 303 hasExt = 0; 304 else { 305 hasExt = 1; 306 extIndex = index; 307 newExtIndex = newIndex; 308 } 309 } 310 if (newIndex < newLen) 311 newName[newIndex++] = curr; 312 else 313 needsCRC = 1; 314 } 315 } 316 if (needsCRC) { 317 uint8_t ext[EXT_SIZE]; 318 int localExtIndex = 0; 319 320 if (hasExt) { 321 int maxFilenameLen; 322 for (index = 0; 323 index < EXT_SIZE && extIndex + index + 1 < udfLen; 324 index++) { 325 curr = udfName[extIndex + index + 1]; 326 327 if (curr == '/' || curr == 0) { 328 needsCRC = 1; 329 curr = ILLEGAL_CHAR_MARK; 330 while (extIndex + index + 2 < udfLen && 331 (index + 1 < EXT_SIZE && 332 (udfName[extIndex + index + 2] == '/' || 333 udfName[extIndex + index + 2] == 0))) 334 index++; 335 } 336 ext[localExtIndex++] = curr; 337 } 338 maxFilenameLen = newLen - CRC_LEN - localExtIndex; 339 if (newIndex > maxFilenameLen) 340 newIndex = maxFilenameLen; 341 else 342 newIndex = newExtIndex; 343 } else if (newIndex > newLen - CRC_LEN) 344 newIndex = newLen - CRC_LEN; 345 newName[newIndex++] = CRC_MARK; 346 valueCRC = crc_itu_t(0, fidName, fidNameLen); 347 newName[newIndex++] = hex_asc_upper_hi(valueCRC >> 8); 348 newName[newIndex++] = hex_asc_upper_lo(valueCRC >> 8); 349 newName[newIndex++] = hex_asc_upper_hi(valueCRC); 350 newName[newIndex++] = hex_asc_upper_lo(valueCRC); 351 352 if (hasExt) { 353 newName[newIndex++] = EXT_MARK; 354 for (index = 0; index < localExtIndex; index++) 355 newName[newIndex++] = ext[index]; 356 } 357 } 358 359 return newIndex; 360 } 361