xref: /openbmc/linux/fs/smb/client/cifs_unicode.c (revision de548452)
1*38c8a9a5SSteve French // SPDX-License-Identifier: GPL-2.0-or-later
2*38c8a9a5SSteve French /*
3*38c8a9a5SSteve French  *
4*38c8a9a5SSteve French  *   Copyright (c) International Business Machines  Corp., 2000,2009
5*38c8a9a5SSteve French  *   Modified by Steve French (sfrench@us.ibm.com)
6*38c8a9a5SSteve French  */
7*38c8a9a5SSteve French #include <linux/fs.h>
8*38c8a9a5SSteve French #include <linux/slab.h>
9*38c8a9a5SSteve French #include "cifs_fs_sb.h"
10*38c8a9a5SSteve French #include "cifs_unicode.h"
11*38c8a9a5SSteve French #include "cifspdu.h"
12*38c8a9a5SSteve French #include "cifsglob.h"
13*38c8a9a5SSteve French #include "cifs_debug.h"
14*38c8a9a5SSteve French 
cifs_remap(struct cifs_sb_info * cifs_sb)15*38c8a9a5SSteve French int cifs_remap(struct cifs_sb_info *cifs_sb)
16*38c8a9a5SSteve French {
17*38c8a9a5SSteve French 	int map_type;
18*38c8a9a5SSteve French 
19*38c8a9a5SSteve French 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
20*38c8a9a5SSteve French 		map_type = SFM_MAP_UNI_RSVD;
21*38c8a9a5SSteve French 	else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
22*38c8a9a5SSteve French 		map_type = SFU_MAP_UNI_RSVD;
23*38c8a9a5SSteve French 	else
24*38c8a9a5SSteve French 		map_type = NO_MAP_UNI_RSVD;
25*38c8a9a5SSteve French 
26*38c8a9a5SSteve French 	return map_type;
27*38c8a9a5SSteve French }
28*38c8a9a5SSteve French 
29*38c8a9a5SSteve French /* Convert character using the SFU - "Services for Unix" remapping range */
30*38c8a9a5SSteve French static bool
convert_sfu_char(const __u16 src_char,char * target)31*38c8a9a5SSteve French convert_sfu_char(const __u16 src_char, char *target)
32*38c8a9a5SSteve French {
33*38c8a9a5SSteve French 	/*
34*38c8a9a5SSteve French 	 * BB: Cannot handle remapping UNI_SLASH until all the calls to
35*38c8a9a5SSteve French 	 *     build_path_from_dentry are modified, as they use slash as
36*38c8a9a5SSteve French 	 *     separator.
37*38c8a9a5SSteve French 	 */
38*38c8a9a5SSteve French 	switch (src_char) {
39*38c8a9a5SSteve French 	case UNI_COLON:
40*38c8a9a5SSteve French 		*target = ':';
41*38c8a9a5SSteve French 		break;
42*38c8a9a5SSteve French 	case UNI_ASTERISK:
43*38c8a9a5SSteve French 		*target = '*';
44*38c8a9a5SSteve French 		break;
45*38c8a9a5SSteve French 	case UNI_QUESTION:
46*38c8a9a5SSteve French 		*target = '?';
47*38c8a9a5SSteve French 		break;
48*38c8a9a5SSteve French 	case UNI_PIPE:
49*38c8a9a5SSteve French 		*target = '|';
50*38c8a9a5SSteve French 		break;
51*38c8a9a5SSteve French 	case UNI_GRTRTHAN:
52*38c8a9a5SSteve French 		*target = '>';
53*38c8a9a5SSteve French 		break;
54*38c8a9a5SSteve French 	case UNI_LESSTHAN:
55*38c8a9a5SSteve French 		*target = '<';
56*38c8a9a5SSteve French 		break;
57*38c8a9a5SSteve French 	default:
58*38c8a9a5SSteve French 		return false;
59*38c8a9a5SSteve French 	}
60*38c8a9a5SSteve French 	return true;
61*38c8a9a5SSteve French }
62*38c8a9a5SSteve French 
63*38c8a9a5SSteve French /* Convert character using the SFM - "Services for Mac" remapping range */
64*38c8a9a5SSteve French static bool
convert_sfm_char(const __u16 src_char,char * target)65*38c8a9a5SSteve French convert_sfm_char(const __u16 src_char, char *target)
66*38c8a9a5SSteve French {
67*38c8a9a5SSteve French 	if (src_char >= 0xF001 && src_char <= 0xF01F) {
68*38c8a9a5SSteve French 		*target = src_char - 0xF000;
69*38c8a9a5SSteve French 		return true;
70*38c8a9a5SSteve French 	}
71*38c8a9a5SSteve French 	switch (src_char) {
72*38c8a9a5SSteve French 	case SFM_COLON:
73*38c8a9a5SSteve French 		*target = ':';
74*38c8a9a5SSteve French 		break;
75*38c8a9a5SSteve French 	case SFM_DOUBLEQUOTE:
76*38c8a9a5SSteve French 		*target = '"';
77*38c8a9a5SSteve French 		break;
78*38c8a9a5SSteve French 	case SFM_ASTERISK:
79*38c8a9a5SSteve French 		*target = '*';
80*38c8a9a5SSteve French 		break;
81*38c8a9a5SSteve French 	case SFM_QUESTION:
82*38c8a9a5SSteve French 		*target = '?';
83*38c8a9a5SSteve French 		break;
84*38c8a9a5SSteve French 	case SFM_PIPE:
85*38c8a9a5SSteve French 		*target = '|';
86*38c8a9a5SSteve French 		break;
87*38c8a9a5SSteve French 	case SFM_GRTRTHAN:
88*38c8a9a5SSteve French 		*target = '>';
89*38c8a9a5SSteve French 		break;
90*38c8a9a5SSteve French 	case SFM_LESSTHAN:
91*38c8a9a5SSteve French 		*target = '<';
92*38c8a9a5SSteve French 		break;
93*38c8a9a5SSteve French 	case SFM_SPACE:
94*38c8a9a5SSteve French 		*target = ' ';
95*38c8a9a5SSteve French 		break;
96*38c8a9a5SSteve French 	case SFM_PERIOD:
97*38c8a9a5SSteve French 		*target = '.';
98*38c8a9a5SSteve French 		break;
99*38c8a9a5SSteve French 	default:
100*38c8a9a5SSteve French 		return false;
101*38c8a9a5SSteve French 	}
102*38c8a9a5SSteve French 	return true;
103*38c8a9a5SSteve French }
104*38c8a9a5SSteve French 
105*38c8a9a5SSteve French 
106*38c8a9a5SSteve French /*
107*38c8a9a5SSteve French  * cifs_mapchar - convert a host-endian char to proper char in codepage
108*38c8a9a5SSteve French  * @target - where converted character should be copied
109*38c8a9a5SSteve French  * @src_char - 2 byte host-endian source character
110*38c8a9a5SSteve French  * @cp - codepage to which character should be converted
111*38c8a9a5SSteve French  * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2?
112*38c8a9a5SSteve French  *
113*38c8a9a5SSteve French  * This function handles the conversion of a single character. It is the
114*38c8a9a5SSteve French  * responsibility of the caller to ensure that the target buffer is large
115*38c8a9a5SSteve French  * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
116*38c8a9a5SSteve French  */
117*38c8a9a5SSteve French static int
cifs_mapchar(char * target,const __u16 * from,const struct nls_table * cp,int maptype)118*38c8a9a5SSteve French cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
119*38c8a9a5SSteve French 	     int maptype)
120*38c8a9a5SSteve French {
121*38c8a9a5SSteve French 	int len = 1;
122*38c8a9a5SSteve French 	__u16 src_char;
123*38c8a9a5SSteve French 
124*38c8a9a5SSteve French 	src_char = *from;
125*38c8a9a5SSteve French 
126*38c8a9a5SSteve French 	if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
127*38c8a9a5SSteve French 		return len;
128*38c8a9a5SSteve French 	else if ((maptype == SFU_MAP_UNI_RSVD) &&
129*38c8a9a5SSteve French 		  convert_sfu_char(src_char, target))
130*38c8a9a5SSteve French 		return len;
131*38c8a9a5SSteve French 
132*38c8a9a5SSteve French 	/* if character not one of seven in special remap set */
133*38c8a9a5SSteve French 	len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
134*38c8a9a5SSteve French 	if (len <= 0)
135*38c8a9a5SSteve French 		goto surrogate_pair;
136*38c8a9a5SSteve French 
137*38c8a9a5SSteve French 	return len;
138*38c8a9a5SSteve French 
139*38c8a9a5SSteve French surrogate_pair:
140*38c8a9a5SSteve French 	/* convert SURROGATE_PAIR and IVS */
141*38c8a9a5SSteve French 	if (strcmp(cp->charset, "utf8"))
142*38c8a9a5SSteve French 		goto unknown;
143*38c8a9a5SSteve French 	len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
144*38c8a9a5SSteve French 	if (len <= 0)
145*38c8a9a5SSteve French 		goto unknown;
146*38c8a9a5SSteve French 	return len;
147*38c8a9a5SSteve French 
148*38c8a9a5SSteve French unknown:
149*38c8a9a5SSteve French 	*target = '?';
150*38c8a9a5SSteve French 	len = 1;
151*38c8a9a5SSteve French 	return len;
152*38c8a9a5SSteve French }
153*38c8a9a5SSteve French 
154*38c8a9a5SSteve French /*
155*38c8a9a5SSteve French  * cifs_from_utf16 - convert utf16le string to local charset
156*38c8a9a5SSteve French  * @to - destination buffer
157*38c8a9a5SSteve French  * @from - source buffer
158*38c8a9a5SSteve French  * @tolen - destination buffer size (in bytes)
159*38c8a9a5SSteve French  * @fromlen - source buffer size (in bytes)
160*38c8a9a5SSteve French  * @codepage - codepage to which characters should be converted
161*38c8a9a5SSteve French  * @mapchar - should characters be remapped according to the mapchars option?
162*38c8a9a5SSteve French  *
163*38c8a9a5SSteve French  * Convert a little-endian utf16le string (as sent by the server) to a string
164*38c8a9a5SSteve French  * in the provided codepage. The tolen and fromlen parameters are to ensure
165*38c8a9a5SSteve French  * that the code doesn't walk off of the end of the buffer (which is always
166*38c8a9a5SSteve French  * a danger if the alignment of the source buffer is off). The destination
167*38c8a9a5SSteve French  * string is always properly null terminated and fits in the destination
168*38c8a9a5SSteve French  * buffer. Returns the length of the destination string in bytes (including
169*38c8a9a5SSteve French  * null terminator).
170*38c8a9a5SSteve French  *
171*38c8a9a5SSteve French  * Note that some windows versions actually send multiword UTF-16 characters
172*38c8a9a5SSteve French  * instead of straight UTF16-2. The linux nls routines however aren't able to
173*38c8a9a5SSteve French  * deal with those characters properly. In the event that we get some of
174*38c8a9a5SSteve French  * those characters, they won't be translated properly.
175*38c8a9a5SSteve French  */
176*38c8a9a5SSteve French int
cifs_from_utf16(char * to,const __le16 * from,int tolen,int fromlen,const struct nls_table * codepage,int map_type)177*38c8a9a5SSteve French cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
178*38c8a9a5SSteve French 		const struct nls_table *codepage, int map_type)
179*38c8a9a5SSteve French {
180*38c8a9a5SSteve French 	int i, charlen, safelen;
181*38c8a9a5SSteve French 	int outlen = 0;
182*38c8a9a5SSteve French 	int nullsize = nls_nullsize(codepage);
183*38c8a9a5SSteve French 	int fromwords = fromlen / 2;
184*38c8a9a5SSteve French 	char tmp[NLS_MAX_CHARSET_SIZE];
185*38c8a9a5SSteve French 	__u16 ftmp[3];		/* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
186*38c8a9a5SSteve French 
187*38c8a9a5SSteve French 	/*
188*38c8a9a5SSteve French 	 * because the chars can be of varying widths, we need to take care
189*38c8a9a5SSteve French 	 * not to overflow the destination buffer when we get close to the
190*38c8a9a5SSteve French 	 * end of it. Until we get to this offset, we don't need to check
191*38c8a9a5SSteve French 	 * for overflow however.
192*38c8a9a5SSteve French 	 */
193*38c8a9a5SSteve French 	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
194*38c8a9a5SSteve French 
195*38c8a9a5SSteve French 	for (i = 0; i < fromwords; i++) {
196*38c8a9a5SSteve French 		ftmp[0] = get_unaligned_le16(&from[i]);
197*38c8a9a5SSteve French 		if (ftmp[0] == 0)
198*38c8a9a5SSteve French 			break;
199*38c8a9a5SSteve French 		if (i + 1 < fromwords)
200*38c8a9a5SSteve French 			ftmp[1] = get_unaligned_le16(&from[i + 1]);
201*38c8a9a5SSteve French 		else
202*38c8a9a5SSteve French 			ftmp[1] = 0;
203*38c8a9a5SSteve French 		if (i + 2 < fromwords)
204*38c8a9a5SSteve French 			ftmp[2] = get_unaligned_le16(&from[i + 2]);
205*38c8a9a5SSteve French 		else
206*38c8a9a5SSteve French 			ftmp[2] = 0;
207*38c8a9a5SSteve French 
208*38c8a9a5SSteve French 		/*
209*38c8a9a5SSteve French 		 * check to see if converting this character might make the
210*38c8a9a5SSteve French 		 * conversion bleed into the null terminator
211*38c8a9a5SSteve French 		 */
212*38c8a9a5SSteve French 		if (outlen >= safelen) {
213*38c8a9a5SSteve French 			charlen = cifs_mapchar(tmp, ftmp, codepage, map_type);
214*38c8a9a5SSteve French 			if ((outlen + charlen) > (tolen - nullsize))
215*38c8a9a5SSteve French 				break;
216*38c8a9a5SSteve French 		}
217*38c8a9a5SSteve French 
218*38c8a9a5SSteve French 		/* put converted char into 'to' buffer */
219*38c8a9a5SSteve French 		charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
220*38c8a9a5SSteve French 		outlen += charlen;
221*38c8a9a5SSteve French 
222*38c8a9a5SSteve French 		/* charlen (=bytes of UTF-8 for 1 character)
223*38c8a9a5SSteve French 		 * 4bytes UTF-8(surrogate pair) is charlen=4
224*38c8a9a5SSteve French 		 *   (4bytes UTF-16 code)
225*38c8a9a5SSteve French 		 * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
226*38c8a9a5SSteve French 		 *   (2 UTF-8 pairs divided to 2 UTF-16 pairs) */
227*38c8a9a5SSteve French 		if (charlen == 4)
228*38c8a9a5SSteve French 			i++;
229*38c8a9a5SSteve French 		else if (charlen >= 5)
230*38c8a9a5SSteve French 			/* 5-6bytes UTF-8 */
231*38c8a9a5SSteve French 			i += 2;
232*38c8a9a5SSteve French 	}
233*38c8a9a5SSteve French 
234*38c8a9a5SSteve French 	/* properly null-terminate string */
235*38c8a9a5SSteve French 	for (i = 0; i < nullsize; i++)
236*38c8a9a5SSteve French 		to[outlen++] = 0;
237*38c8a9a5SSteve French 
238*38c8a9a5SSteve French 	return outlen;
239*38c8a9a5SSteve French }
240*38c8a9a5SSteve French 
241*38c8a9a5SSteve French /*
242*38c8a9a5SSteve French  * NAME:	cifs_strtoUTF16()
243*38c8a9a5SSteve French  *
244*38c8a9a5SSteve French  * FUNCTION:	Convert character string to unicode string
245*38c8a9a5SSteve French  *
246*38c8a9a5SSteve French  */
247*38c8a9a5SSteve French int
cifs_strtoUTF16(__le16 * to,const char * from,int len,const struct nls_table * codepage)248*38c8a9a5SSteve French cifs_strtoUTF16(__le16 *to, const char *from, int len,
249*38c8a9a5SSteve French 	      const struct nls_table *codepage)
250*38c8a9a5SSteve French {
251*38c8a9a5SSteve French 	int charlen;
252*38c8a9a5SSteve French 	int i;
253*38c8a9a5SSteve French 	wchar_t wchar_to; /* needed to quiet sparse */
254*38c8a9a5SSteve French 
255*38c8a9a5SSteve French 	/* special case for utf8 to handle no plane0 chars */
256*38c8a9a5SSteve French 	if (!strcmp(codepage->charset, "utf8")) {
257*38c8a9a5SSteve French 		/*
258*38c8a9a5SSteve French 		 * convert utf8 -> utf16, we assume we have enough space
259*38c8a9a5SSteve French 		 * as caller should have assumed conversion does not overflow
260*38c8a9a5SSteve French 		 * in destination len is length in wchar_t units (16bits)
261*38c8a9a5SSteve French 		 */
262*38c8a9a5SSteve French 		i  = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
263*38c8a9a5SSteve French 				       (wchar_t *) to, len);
264*38c8a9a5SSteve French 
265*38c8a9a5SSteve French 		/* if success terminate and exit */
266*38c8a9a5SSteve French 		if (i >= 0)
267*38c8a9a5SSteve French 			goto success;
268*38c8a9a5SSteve French 		/*
269*38c8a9a5SSteve French 		 * if fails fall back to UCS encoding as this
270*38c8a9a5SSteve French 		 * function should not return negative values
271*38c8a9a5SSteve French 		 * currently can fail only if source contains
272*38c8a9a5SSteve French 		 * invalid encoded characters
273*38c8a9a5SSteve French 		 */
274*38c8a9a5SSteve French 	}
275*38c8a9a5SSteve French 
276*38c8a9a5SSteve French 	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
277*38c8a9a5SSteve French 		charlen = codepage->char2uni(from, len, &wchar_to);
278*38c8a9a5SSteve French 		if (charlen < 1) {
279*38c8a9a5SSteve French 			cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n",
280*38c8a9a5SSteve French 				 *from, charlen);
281*38c8a9a5SSteve French 			/* A question mark */
282*38c8a9a5SSteve French 			wchar_to = 0x003f;
283*38c8a9a5SSteve French 			charlen = 1;
284*38c8a9a5SSteve French 		}
285*38c8a9a5SSteve French 		put_unaligned_le16(wchar_to, &to[i]);
286*38c8a9a5SSteve French 	}
287*38c8a9a5SSteve French 
288*38c8a9a5SSteve French success:
289*38c8a9a5SSteve French 	put_unaligned_le16(0, &to[i]);
290*38c8a9a5SSteve French 	return i;
291*38c8a9a5SSteve French }
292*38c8a9a5SSteve French 
293*38c8a9a5SSteve French /*
294*38c8a9a5SSteve French  * cifs_utf16_bytes - how long will a string be after conversion?
295*38c8a9a5SSteve French  * @utf16 - pointer to input string
296*38c8a9a5SSteve French  * @maxbytes - don't go past this many bytes of input string
297*38c8a9a5SSteve French  * @codepage - destination codepage
298*38c8a9a5SSteve French  *
299*38c8a9a5SSteve French  * Walk a utf16le string and return the number of bytes that the string will
300*38c8a9a5SSteve French  * be after being converted to the given charset, not including any null
301*38c8a9a5SSteve French  * termination required. Don't walk past maxbytes in the source buffer.
302*38c8a9a5SSteve French  */
303*38c8a9a5SSteve French int
cifs_utf16_bytes(const __le16 * from,int maxbytes,const struct nls_table * codepage)304*38c8a9a5SSteve French cifs_utf16_bytes(const __le16 *from, int maxbytes,
305*38c8a9a5SSteve French 		const struct nls_table *codepage)
306*38c8a9a5SSteve French {
307*38c8a9a5SSteve French 	int i;
308*38c8a9a5SSteve French 	int charlen, outlen = 0;
309*38c8a9a5SSteve French 	int maxwords = maxbytes / 2;
310*38c8a9a5SSteve French 	char tmp[NLS_MAX_CHARSET_SIZE];
311*38c8a9a5SSteve French 	__u16 ftmp[3];
312*38c8a9a5SSteve French 
313*38c8a9a5SSteve French 	for (i = 0; i < maxwords; i++) {
314*38c8a9a5SSteve French 		ftmp[0] = get_unaligned_le16(&from[i]);
315*38c8a9a5SSteve French 		if (ftmp[0] == 0)
316*38c8a9a5SSteve French 			break;
317*38c8a9a5SSteve French 		if (i + 1 < maxwords)
318*38c8a9a5SSteve French 			ftmp[1] = get_unaligned_le16(&from[i + 1]);
319*38c8a9a5SSteve French 		else
320*38c8a9a5SSteve French 			ftmp[1] = 0;
321*38c8a9a5SSteve French 		if (i + 2 < maxwords)
322*38c8a9a5SSteve French 			ftmp[2] = get_unaligned_le16(&from[i + 2]);
323*38c8a9a5SSteve French 		else
324*38c8a9a5SSteve French 			ftmp[2] = 0;
325*38c8a9a5SSteve French 
326*38c8a9a5SSteve French 		charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD);
327*38c8a9a5SSteve French 		outlen += charlen;
328*38c8a9a5SSteve French 	}
329*38c8a9a5SSteve French 
330*38c8a9a5SSteve French 	return outlen;
331*38c8a9a5SSteve French }
332*38c8a9a5SSteve French 
333*38c8a9a5SSteve French /*
334*38c8a9a5SSteve French  * cifs_strndup_from_utf16 - copy a string from wire format to the local
335*38c8a9a5SSteve French  * codepage
336*38c8a9a5SSteve French  * @src - source string
337*38c8a9a5SSteve French  * @maxlen - don't walk past this many bytes in the source string
338*38c8a9a5SSteve French  * @is_unicode - is this a unicode string?
339*38c8a9a5SSteve French  * @codepage - destination codepage
340*38c8a9a5SSteve French  *
341*38c8a9a5SSteve French  * Take a string given by the server, convert it to the local codepage and
342*38c8a9a5SSteve French  * put it in a new buffer. Returns a pointer to the new string or NULL on
343*38c8a9a5SSteve French  * error.
344*38c8a9a5SSteve French  */
345*38c8a9a5SSteve French char *
cifs_strndup_from_utf16(const char * src,const int maxlen,const bool is_unicode,const struct nls_table * codepage)346*38c8a9a5SSteve French cifs_strndup_from_utf16(const char *src, const int maxlen,
347*38c8a9a5SSteve French 			const bool is_unicode, const struct nls_table *codepage)
348*38c8a9a5SSteve French {
349*38c8a9a5SSteve French 	int len;
350*38c8a9a5SSteve French 	char *dst;
351*38c8a9a5SSteve French 
352*38c8a9a5SSteve French 	if (is_unicode) {
353*38c8a9a5SSteve French 		len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage);
354*38c8a9a5SSteve French 		len += nls_nullsize(codepage);
355*38c8a9a5SSteve French 		dst = kmalloc(len, GFP_KERNEL);
356*38c8a9a5SSteve French 		if (!dst)
357*38c8a9a5SSteve French 			return NULL;
358*38c8a9a5SSteve French 		cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
359*38c8a9a5SSteve French 				NO_MAP_UNI_RSVD);
360*38c8a9a5SSteve French 	} else {
361*38c8a9a5SSteve French 		dst = kstrndup(src, maxlen, GFP_KERNEL);
362*38c8a9a5SSteve French 	}
363*38c8a9a5SSteve French 
364*38c8a9a5SSteve French 	return dst;
365*38c8a9a5SSteve French }
366*38c8a9a5SSteve French 
convert_to_sfu_char(char src_char)367*38c8a9a5SSteve French static __le16 convert_to_sfu_char(char src_char)
368*38c8a9a5SSteve French {
369*38c8a9a5SSteve French 	__le16 dest_char;
370*38c8a9a5SSteve French 
371*38c8a9a5SSteve French 	switch (src_char) {
372*38c8a9a5SSteve French 	case ':':
373*38c8a9a5SSteve French 		dest_char = cpu_to_le16(UNI_COLON);
374*38c8a9a5SSteve French 		break;
375*38c8a9a5SSteve French 	case '*':
376*38c8a9a5SSteve French 		dest_char = cpu_to_le16(UNI_ASTERISK);
377*38c8a9a5SSteve French 		break;
378*38c8a9a5SSteve French 	case '?':
379*38c8a9a5SSteve French 		dest_char = cpu_to_le16(UNI_QUESTION);
380*38c8a9a5SSteve French 		break;
381*38c8a9a5SSteve French 	case '<':
382*38c8a9a5SSteve French 		dest_char = cpu_to_le16(UNI_LESSTHAN);
383*38c8a9a5SSteve French 		break;
384*38c8a9a5SSteve French 	case '>':
385*38c8a9a5SSteve French 		dest_char = cpu_to_le16(UNI_GRTRTHAN);
386*38c8a9a5SSteve French 		break;
387*38c8a9a5SSteve French 	case '|':
388*38c8a9a5SSteve French 		dest_char = cpu_to_le16(UNI_PIPE);
389*38c8a9a5SSteve French 		break;
390*38c8a9a5SSteve French 	default:
391*38c8a9a5SSteve French 		dest_char = 0;
392*38c8a9a5SSteve French 	}
393*38c8a9a5SSteve French 
394*38c8a9a5SSteve French 	return dest_char;
395*38c8a9a5SSteve French }
396*38c8a9a5SSteve French 
convert_to_sfm_char(char src_char,bool end_of_string)397*38c8a9a5SSteve French static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
398*38c8a9a5SSteve French {
399*38c8a9a5SSteve French 	__le16 dest_char;
400*38c8a9a5SSteve French 
401*38c8a9a5SSteve French 	if (src_char >= 0x01 && src_char <= 0x1F) {
402*38c8a9a5SSteve French 		dest_char = cpu_to_le16(src_char + 0xF000);
403*38c8a9a5SSteve French 		return dest_char;
404*38c8a9a5SSteve French 	}
405*38c8a9a5SSteve French 	switch (src_char) {
406*38c8a9a5SSteve French 	case ':':
407*38c8a9a5SSteve French 		dest_char = cpu_to_le16(SFM_COLON);
408*38c8a9a5SSteve French 		break;
409*38c8a9a5SSteve French 	case '"':
410*38c8a9a5SSteve French 		dest_char = cpu_to_le16(SFM_DOUBLEQUOTE);
411*38c8a9a5SSteve French 		break;
412*38c8a9a5SSteve French 	case '*':
413*38c8a9a5SSteve French 		dest_char = cpu_to_le16(SFM_ASTERISK);
414*38c8a9a5SSteve French 		break;
415*38c8a9a5SSteve French 	case '?':
416*38c8a9a5SSteve French 		dest_char = cpu_to_le16(SFM_QUESTION);
417*38c8a9a5SSteve French 		break;
418*38c8a9a5SSteve French 	case '<':
419*38c8a9a5SSteve French 		dest_char = cpu_to_le16(SFM_LESSTHAN);
420*38c8a9a5SSteve French 		break;
421*38c8a9a5SSteve French 	case '>':
422*38c8a9a5SSteve French 		dest_char = cpu_to_le16(SFM_GRTRTHAN);
423*38c8a9a5SSteve French 		break;
424*38c8a9a5SSteve French 	case '|':
425*38c8a9a5SSteve French 		dest_char = cpu_to_le16(SFM_PIPE);
426*38c8a9a5SSteve French 		break;
427*38c8a9a5SSteve French 	case '.':
428*38c8a9a5SSteve French 		if (end_of_string)
429*38c8a9a5SSteve French 			dest_char = cpu_to_le16(SFM_PERIOD);
430*38c8a9a5SSteve French 		else
431*38c8a9a5SSteve French 			dest_char = 0;
432*38c8a9a5SSteve French 		break;
433*38c8a9a5SSteve French 	case ' ':
434*38c8a9a5SSteve French 		if (end_of_string)
435*38c8a9a5SSteve French 			dest_char = cpu_to_le16(SFM_SPACE);
436*38c8a9a5SSteve French 		else
437*38c8a9a5SSteve French 			dest_char = 0;
438*38c8a9a5SSteve French 		break;
439*38c8a9a5SSteve French 	default:
440*38c8a9a5SSteve French 		dest_char = 0;
441*38c8a9a5SSteve French 	}
442*38c8a9a5SSteve French 
443*38c8a9a5SSteve French 	return dest_char;
444*38c8a9a5SSteve French }
445*38c8a9a5SSteve French 
446*38c8a9a5SSteve French /*
447*38c8a9a5SSteve French  * Convert 16 bit Unicode pathname to wire format from string in current code
448*38c8a9a5SSteve French  * page. Conversion may involve remapping up the six characters that are
449*38c8a9a5SSteve French  * only legal in POSIX-like OS (if they are present in the string). Path
450*38c8a9a5SSteve French  * names are little endian 16 bit Unicode on the wire
451*38c8a9a5SSteve French  */
452*38c8a9a5SSteve French int
cifsConvertToUTF16(__le16 * target,const char * source,int srclen,const struct nls_table * cp,int map_chars)453*38c8a9a5SSteve French cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
454*38c8a9a5SSteve French 		 const struct nls_table *cp, int map_chars)
455*38c8a9a5SSteve French {
456*38c8a9a5SSteve French 	int i, charlen;
457*38c8a9a5SSteve French 	int j = 0;
458*38c8a9a5SSteve French 	char src_char;
459*38c8a9a5SSteve French 	__le16 dst_char;
460*38c8a9a5SSteve French 	wchar_t tmp;
461*38c8a9a5SSteve French 	wchar_t *wchar_to;	/* UTF-16 */
462*38c8a9a5SSteve French 	int ret;
463*38c8a9a5SSteve French 	unicode_t u;
464*38c8a9a5SSteve French 
465*38c8a9a5SSteve French 	if (map_chars == NO_MAP_UNI_RSVD)
466*38c8a9a5SSteve French 		return cifs_strtoUTF16(target, source, PATH_MAX, cp);
467*38c8a9a5SSteve French 
468*38c8a9a5SSteve French 	wchar_to = kzalloc(6, GFP_KERNEL);
469*38c8a9a5SSteve French 
470*38c8a9a5SSteve French 	for (i = 0; i < srclen; j++) {
471*38c8a9a5SSteve French 		src_char = source[i];
472*38c8a9a5SSteve French 		charlen = 1;
473*38c8a9a5SSteve French 
474*38c8a9a5SSteve French 		/* check if end of string */
475*38c8a9a5SSteve French 		if (src_char == 0)
476*38c8a9a5SSteve French 			goto ctoUTF16_out;
477*38c8a9a5SSteve French 
478*38c8a9a5SSteve French 		/* see if we must remap this char */
479*38c8a9a5SSteve French 		if (map_chars == SFU_MAP_UNI_RSVD)
480*38c8a9a5SSteve French 			dst_char = convert_to_sfu_char(src_char);
481*38c8a9a5SSteve French 		else if (map_chars == SFM_MAP_UNI_RSVD) {
482*38c8a9a5SSteve French 			bool end_of_string;
483*38c8a9a5SSteve French 
484*38c8a9a5SSteve French 			/**
485*38c8a9a5SSteve French 			 * Remap spaces and periods found at the end of every
486*38c8a9a5SSteve French 			 * component of the path. The special cases of '.' and
487*38c8a9a5SSteve French 			 * '..' do not need to be dealt with explicitly because
488*38c8a9a5SSteve French 			 * they are addressed in namei.c:link_path_walk().
489*38c8a9a5SSteve French 			 **/
490*38c8a9a5SSteve French 			if ((i == srclen - 1) || (source[i+1] == '\\'))
491*38c8a9a5SSteve French 				end_of_string = true;
492*38c8a9a5SSteve French 			else
493*38c8a9a5SSteve French 				end_of_string = false;
494*38c8a9a5SSteve French 
495*38c8a9a5SSteve French 			dst_char = convert_to_sfm_char(src_char, end_of_string);
496*38c8a9a5SSteve French 		} else
497*38c8a9a5SSteve French 			dst_char = 0;
498*38c8a9a5SSteve French 		/*
499*38c8a9a5SSteve French 		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
500*38c8a9a5SSteve French 		 * until all the calls to build_path_from_dentry are modified,
501*38c8a9a5SSteve French 		 * as they use backslash as separator.
502*38c8a9a5SSteve French 		 */
503*38c8a9a5SSteve French 		if (dst_char == 0) {
504*38c8a9a5SSteve French 			charlen = cp->char2uni(source + i, srclen - i, &tmp);
505*38c8a9a5SSteve French 			dst_char = cpu_to_le16(tmp);
506*38c8a9a5SSteve French 
507*38c8a9a5SSteve French 			/*
508*38c8a9a5SSteve French 			 * if no match, use question mark, which at least in
509*38c8a9a5SSteve French 			 * some cases serves as wild card
510*38c8a9a5SSteve French 			 */
511*38c8a9a5SSteve French 			if (charlen > 0)
512*38c8a9a5SSteve French 				goto ctoUTF16;
513*38c8a9a5SSteve French 
514*38c8a9a5SSteve French 			/* convert SURROGATE_PAIR */
515*38c8a9a5SSteve French 			if (strcmp(cp->charset, "utf8") || !wchar_to)
516*38c8a9a5SSteve French 				goto unknown;
517*38c8a9a5SSteve French 			if (*(source + i) & 0x80) {
518*38c8a9a5SSteve French 				charlen = utf8_to_utf32(source + i, 6, &u);
519*38c8a9a5SSteve French 				if (charlen < 0)
520*38c8a9a5SSteve French 					goto unknown;
521*38c8a9a5SSteve French 			} else
522*38c8a9a5SSteve French 				goto unknown;
523*38c8a9a5SSteve French 			ret  = utf8s_to_utf16s(source + i, charlen,
524*38c8a9a5SSteve French 					       UTF16_LITTLE_ENDIAN,
525*38c8a9a5SSteve French 					       wchar_to, 6);
526*38c8a9a5SSteve French 			if (ret < 0)
527*38c8a9a5SSteve French 				goto unknown;
528*38c8a9a5SSteve French 
529*38c8a9a5SSteve French 			i += charlen;
530*38c8a9a5SSteve French 			dst_char = cpu_to_le16(*wchar_to);
531*38c8a9a5SSteve French 			if (charlen <= 3)
532*38c8a9a5SSteve French 				/* 1-3bytes UTF-8 to 2bytes UTF-16 */
533*38c8a9a5SSteve French 				put_unaligned(dst_char, &target[j]);
534*38c8a9a5SSteve French 			else if (charlen == 4) {
535*38c8a9a5SSteve French 				/* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
536*38c8a9a5SSteve French 				 * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
537*38c8a9a5SSteve French 				 *   (charlen=3+4 or 4+4) */
538*38c8a9a5SSteve French 				put_unaligned(dst_char, &target[j]);
539*38c8a9a5SSteve French 				dst_char = cpu_to_le16(*(wchar_to + 1));
540*38c8a9a5SSteve French 				j++;
541*38c8a9a5SSteve French 				put_unaligned(dst_char, &target[j]);
542*38c8a9a5SSteve French 			} else if (charlen >= 5) {
543*38c8a9a5SSteve French 				/* 5-6bytes UTF-8 to 6bytes UTF-16 */
544*38c8a9a5SSteve French 				put_unaligned(dst_char, &target[j]);
545*38c8a9a5SSteve French 				dst_char = cpu_to_le16(*(wchar_to + 1));
546*38c8a9a5SSteve French 				j++;
547*38c8a9a5SSteve French 				put_unaligned(dst_char, &target[j]);
548*38c8a9a5SSteve French 				dst_char = cpu_to_le16(*(wchar_to + 2));
549*38c8a9a5SSteve French 				j++;
550*38c8a9a5SSteve French 				put_unaligned(dst_char, &target[j]);
551*38c8a9a5SSteve French 			}
552*38c8a9a5SSteve French 			continue;
553*38c8a9a5SSteve French 
554*38c8a9a5SSteve French unknown:
555*38c8a9a5SSteve French 			dst_char = cpu_to_le16(0x003f);
556*38c8a9a5SSteve French 			charlen = 1;
557*38c8a9a5SSteve French 		}
558*38c8a9a5SSteve French 
559*38c8a9a5SSteve French ctoUTF16:
560*38c8a9a5SSteve French 		/*
561*38c8a9a5SSteve French 		 * character may take more than one byte in the source string,
562*38c8a9a5SSteve French 		 * but will take exactly two bytes in the target string
563*38c8a9a5SSteve French 		 */
564*38c8a9a5SSteve French 		i += charlen;
565*38c8a9a5SSteve French 		put_unaligned(dst_char, &target[j]);
566*38c8a9a5SSteve French 	}
567*38c8a9a5SSteve French 
568*38c8a9a5SSteve French ctoUTF16_out:
569*38c8a9a5SSteve French 	put_unaligned(0, &target[j]); /* Null terminate target unicode string */
570*38c8a9a5SSteve French 	kfree(wchar_to);
571*38c8a9a5SSteve French 	return j;
572*38c8a9a5SSteve French }
573*38c8a9a5SSteve French 
574*38c8a9a5SSteve French /*
575*38c8a9a5SSteve French  * cifs_local_to_utf16_bytes - how long will a string be after conversion?
576*38c8a9a5SSteve French  * @from - pointer to input string
577*38c8a9a5SSteve French  * @maxbytes - don't go past this many bytes of input string
578*38c8a9a5SSteve French  * @codepage - source codepage
579*38c8a9a5SSteve French  *
580*38c8a9a5SSteve French  * Walk a string and return the number of bytes that the string will
581*38c8a9a5SSteve French  * be after being converted to the given charset, not including any null
582*38c8a9a5SSteve French  * termination required. Don't walk past maxbytes in the source buffer.
583*38c8a9a5SSteve French  */
584*38c8a9a5SSteve French 
585*38c8a9a5SSteve French static int
cifs_local_to_utf16_bytes(const char * from,int len,const struct nls_table * codepage)586*38c8a9a5SSteve French cifs_local_to_utf16_bytes(const char *from, int len,
587*38c8a9a5SSteve French 			  const struct nls_table *codepage)
588*38c8a9a5SSteve French {
589*38c8a9a5SSteve French 	int charlen;
590*38c8a9a5SSteve French 	int i;
591*38c8a9a5SSteve French 	wchar_t wchar_to;
592*38c8a9a5SSteve French 
593*38c8a9a5SSteve French 	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
594*38c8a9a5SSteve French 		charlen = codepage->char2uni(from, len, &wchar_to);
595*38c8a9a5SSteve French 		/* Failed conversion defaults to a question mark */
596*38c8a9a5SSteve French 		if (charlen < 1)
597*38c8a9a5SSteve French 			charlen = 1;
598*38c8a9a5SSteve French 	}
599*38c8a9a5SSteve French 	return 2 * i; /* UTF16 characters are two bytes */
600*38c8a9a5SSteve French }
601*38c8a9a5SSteve French 
602*38c8a9a5SSteve French /*
603*38c8a9a5SSteve French  * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage
604*38c8a9a5SSteve French  * @src - source string
605*38c8a9a5SSteve French  * @maxlen - don't walk past this many bytes in the source string
606*38c8a9a5SSteve French  * @utf16_len - the length of the allocated string in bytes (including null)
607*38c8a9a5SSteve French  * @cp - source codepage
608*38c8a9a5SSteve French  * @remap - map special chars
609*38c8a9a5SSteve French  *
610*38c8a9a5SSteve French  * Take a string convert it from the local codepage to UTF16 and
611*38c8a9a5SSteve French  * put it in a new buffer. Returns a pointer to the new string or NULL on
612*38c8a9a5SSteve French  * error.
613*38c8a9a5SSteve French  */
614*38c8a9a5SSteve French __le16 *
cifs_strndup_to_utf16(const char * src,const int maxlen,int * utf16_len,const struct nls_table * cp,int remap)615*38c8a9a5SSteve French cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len,
616*38c8a9a5SSteve French 		      const struct nls_table *cp, int remap)
617*38c8a9a5SSteve French {
618*38c8a9a5SSteve French 	int len;
619*38c8a9a5SSteve French 	__le16 *dst;
620*38c8a9a5SSteve French 
621*38c8a9a5SSteve French 	len = cifs_local_to_utf16_bytes(src, maxlen, cp);
622*38c8a9a5SSteve French 	len += 2; /* NULL */
623*38c8a9a5SSteve French 	dst = kmalloc(len, GFP_KERNEL);
624*38c8a9a5SSteve French 	if (!dst) {
625*38c8a9a5SSteve French 		*utf16_len = 0;
626*38c8a9a5SSteve French 		return NULL;
627*38c8a9a5SSteve French 	}
628*38c8a9a5SSteve French 	cifsConvertToUTF16(dst, src, strlen(src), cp, remap);
629*38c8a9a5SSteve French 	*utf16_len = len;
630*38c8a9a5SSteve French 	return dst;
631*38c8a9a5SSteve French }
632