1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Some of the source code in this file came from fs/cifs/cifs_unicode.c 4 * 5 * Copyright (c) International Business Machines Corp., 2000,2009 6 * Modified by Steve French (sfrench@us.ibm.com) 7 * Modified by Namjae Jeon (linkinjeon@kernel.org) 8 */ 9 #include <linux/fs.h> 10 #include <linux/slab.h> 11 #include <asm/unaligned.h> 12 #include "glob.h" 13 #include "unicode.h" 14 #include "uniupr.h" 15 #include "smb_common.h" 16 17 /* 18 * smb_utf16_bytes() - how long will a string be after conversion? 19 * @from: pointer to input string 20 * @maxbytes: don't go past this many bytes of input string 21 * @codepage: destination codepage 22 * 23 * Walk a utf16le string and return the number of bytes that the string will 24 * be after being converted to the given charset, not including any null 25 * termination required. Don't walk past maxbytes in the source buffer. 26 * 27 * Return: string length after conversion 28 */ 29 static int smb_utf16_bytes(const __le16 *from, int maxbytes, 30 const struct nls_table *codepage) 31 { 32 int i; 33 int charlen, outlen = 0; 34 int maxwords = maxbytes / 2; 35 char tmp[NLS_MAX_CHARSET_SIZE]; 36 __u16 ftmp; 37 38 for (i = 0; i < maxwords; i++) { 39 ftmp = get_unaligned_le16(&from[i]); 40 if (ftmp == 0) 41 break; 42 43 charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE); 44 if (charlen > 0) 45 outlen += charlen; 46 else 47 outlen++; 48 } 49 50 return outlen; 51 } 52 53 /* 54 * cifs_mapchar() - convert a host-endian char to proper char in codepage 55 * @target: where converted character should be copied 56 * @src_char: 2 byte host-endian source character 57 * @cp: codepage to which character should be converted 58 * @mapchar: should character be mapped according to mapchars mount option? 59 * 60 * This function handles the conversion of a single character. It is the 61 * responsibility of the caller to ensure that the target buffer is large 62 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). 63 * 64 * Return: string length after conversion 65 */ 66 static int 67 cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, 68 bool mapchar) 69 { 70 int len = 1; 71 72 if (!mapchar) 73 goto cp_convert; 74 75 /* 76 * BB: Cannot handle remapping UNI_SLASH until all the calls to 77 * build_path_from_dentry are modified, as they use slash as 78 * separator. 79 */ 80 switch (src_char) { 81 case UNI_COLON: 82 *target = ':'; 83 break; 84 case UNI_ASTERISK: 85 *target = '*'; 86 break; 87 case UNI_QUESTION: 88 *target = '?'; 89 break; 90 case UNI_PIPE: 91 *target = '|'; 92 break; 93 case UNI_GRTRTHAN: 94 *target = '>'; 95 break; 96 case UNI_LESSTHAN: 97 *target = '<'; 98 break; 99 default: 100 goto cp_convert; 101 } 102 103 out: 104 return len; 105 106 cp_convert: 107 len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); 108 if (len <= 0) { 109 *target = '?'; 110 len = 1; 111 } 112 113 goto out; 114 } 115 116 /* 117 * smb_from_utf16() - convert utf16le string to local charset 118 * @to: destination buffer 119 * @from: source buffer 120 * @tolen: destination buffer size (in bytes) 121 * @fromlen: source buffer size (in bytes) 122 * @codepage: codepage to which characters should be converted 123 * @mapchar: should characters be remapped according to the mapchars option? 124 * 125 * Convert a little-endian utf16le string (as sent by the server) to a string 126 * in the provided codepage. The tolen and fromlen parameters are to ensure 127 * that the code doesn't walk off of the end of the buffer (which is always 128 * a danger if the alignment of the source buffer is off). The destination 129 * string is always properly null terminated and fits in the destination 130 * buffer. Returns the length of the destination string in bytes (including 131 * null terminator). 132 * 133 * Note that some windows versions actually send multiword UTF-16 characters 134 * instead of straight UTF16-2. The linux nls routines however aren't able to 135 * deal with those characters properly. In the event that we get some of 136 * those characters, they won't be translated properly. 137 * 138 * Return: string length after conversion 139 */ 140 static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, 141 const struct nls_table *codepage, bool mapchar) 142 { 143 int i, charlen, safelen; 144 int outlen = 0; 145 int nullsize = nls_nullsize(codepage); 146 int fromwords = fromlen / 2; 147 char tmp[NLS_MAX_CHARSET_SIZE]; 148 __u16 ftmp; 149 150 /* 151 * because the chars can be of varying widths, we need to take care 152 * not to overflow the destination buffer when we get close to the 153 * end of it. Until we get to this offset, we don't need to check 154 * for overflow however. 155 */ 156 safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); 157 158 for (i = 0; i < fromwords; i++) { 159 ftmp = get_unaligned_le16(&from[i]); 160 if (ftmp == 0) 161 break; 162 163 /* 164 * check to see if converting this character might make the 165 * conversion bleed into the null terminator 166 */ 167 if (outlen >= safelen) { 168 charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar); 169 if ((outlen + charlen) > (tolen - nullsize)) 170 break; 171 } 172 173 /* put converted char into 'to' buffer */ 174 charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar); 175 outlen += charlen; 176 } 177 178 /* properly null-terminate string */ 179 for (i = 0; i < nullsize; i++) 180 to[outlen++] = 0; 181 182 return outlen; 183 } 184 185 /* 186 * smb_strtoUTF16() - Convert character string to unicode string 187 * @to: destination buffer 188 * @from: source buffer 189 * @len: destination buffer size (in bytes) 190 * @codepage: codepage to which characters should be converted 191 * 192 * Return: string length after conversion 193 */ 194 int smb_strtoUTF16(__le16 *to, const char *from, int len, 195 const struct nls_table *codepage) 196 { 197 int charlen; 198 int i; 199 wchar_t wchar_to; /* needed to quiet sparse */ 200 201 /* special case for utf8 to handle no plane0 chars */ 202 if (!strcmp(codepage->charset, "utf8")) { 203 /* 204 * convert utf8 -> utf16, we assume we have enough space 205 * as caller should have assumed conversion does not overflow 206 * in destination len is length in wchar_t units (16bits) 207 */ 208 i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN, 209 (wchar_t *)to, len); 210 211 /* if success terminate and exit */ 212 if (i >= 0) 213 goto success; 214 /* 215 * if fails fall back to UCS encoding as this 216 * function should not return negative values 217 * currently can fail only if source contains 218 * invalid encoded characters 219 */ 220 } 221 222 for (i = 0; len > 0 && *from; i++, from += charlen, len -= charlen) { 223 charlen = codepage->char2uni(from, len, &wchar_to); 224 if (charlen < 1) { 225 /* A question mark */ 226 wchar_to = 0x003f; 227 charlen = 1; 228 } 229 put_unaligned_le16(wchar_to, &to[i]); 230 } 231 232 success: 233 put_unaligned_le16(0, &to[i]); 234 return i; 235 } 236 237 /* 238 * smb_strndup_from_utf16() - copy a string from wire format to the local 239 * codepage 240 * @src: source string 241 * @maxlen: don't walk past this many bytes in the source string 242 * @is_unicode: is this a unicode string? 243 * @codepage: destination codepage 244 * 245 * Take a string given by the server, convert it to the local codepage and 246 * put it in a new buffer. Returns a pointer to the new string or NULL on 247 * error. 248 * 249 * Return: destination string buffer or error ptr 250 */ 251 char *smb_strndup_from_utf16(const char *src, const int maxlen, 252 const bool is_unicode, 253 const struct nls_table *codepage) 254 { 255 int len, ret; 256 char *dst; 257 258 if (is_unicode) { 259 len = smb_utf16_bytes((__le16 *)src, maxlen, codepage); 260 len += nls_nullsize(codepage); 261 dst = kmalloc(len, GFP_KERNEL); 262 if (!dst) 263 return ERR_PTR(-ENOMEM); 264 ret = smb_from_utf16(dst, (__le16 *)src, len, maxlen, codepage, 265 false); 266 if (ret < 0) { 267 kfree(dst); 268 return ERR_PTR(-EINVAL); 269 } 270 } else { 271 len = strnlen(src, maxlen); 272 len++; 273 dst = kmalloc(len, GFP_KERNEL); 274 if (!dst) 275 return ERR_PTR(-ENOMEM); 276 strscpy(dst, src, len); 277 } 278 279 return dst; 280 } 281 282 /* 283 * Convert 16 bit Unicode pathname to wire format from string in current code 284 * page. Conversion may involve remapping up the six characters that are 285 * only legal in POSIX-like OS (if they are present in the string). Path 286 * names are little endian 16 bit Unicode on the wire 287 */ 288 /* 289 * smbConvertToUTF16() - convert string from local charset to utf16 290 * @target: destination buffer 291 * @source: source buffer 292 * @srclen: source buffer size (in bytes) 293 * @cp: codepage to which characters should be converted 294 * @mapchar: should characters be remapped according to the mapchars option? 295 * 296 * Convert 16 bit Unicode pathname to wire format from string in current code 297 * page. Conversion may involve remapping up the six characters that are 298 * only legal in POSIX-like OS (if they are present in the string). Path 299 * names are little endian 16 bit Unicode on the wire 300 * 301 * Return: char length after conversion 302 */ 303 int smbConvertToUTF16(__le16 *target, const char *source, int srclen, 304 const struct nls_table *cp, int mapchars) 305 { 306 int i, j, charlen; 307 char src_char; 308 __le16 dst_char; 309 wchar_t tmp; 310 311 if (!mapchars) 312 return smb_strtoUTF16(target, source, srclen, cp); 313 314 for (i = 0, j = 0; i < srclen; j++) { 315 src_char = source[i]; 316 charlen = 1; 317 switch (src_char) { 318 case 0: 319 put_unaligned(0, &target[j]); 320 return j; 321 case ':': 322 dst_char = cpu_to_le16(UNI_COLON); 323 break; 324 case '*': 325 dst_char = cpu_to_le16(UNI_ASTERISK); 326 break; 327 case '?': 328 dst_char = cpu_to_le16(UNI_QUESTION); 329 break; 330 case '<': 331 dst_char = cpu_to_le16(UNI_LESSTHAN); 332 break; 333 case '>': 334 dst_char = cpu_to_le16(UNI_GRTRTHAN); 335 break; 336 case '|': 337 dst_char = cpu_to_le16(UNI_PIPE); 338 break; 339 /* 340 * FIXME: We can not handle remapping backslash (UNI_SLASH) 341 * until all the calls to build_path_from_dentry are modified, 342 * as they use backslash as separator. 343 */ 344 default: 345 charlen = cp->char2uni(source + i, srclen - i, &tmp); 346 dst_char = cpu_to_le16(tmp); 347 348 /* 349 * if no match, use question mark, which at least in 350 * some cases serves as wild card 351 */ 352 if (charlen < 1) { 353 dst_char = cpu_to_le16(0x003f); 354 charlen = 1; 355 } 356 } 357 /* 358 * character may take more than one byte in the source string, 359 * but will take exactly two bytes in the target string 360 */ 361 i += charlen; 362 put_unaligned(dst_char, &target[j]); 363 } 364 365 return j; 366 } 367