xref: /openbmc/linux/fs/udf/unicode.c (revision 2612e3bbc0386368a850140a6c9b990cd496a5ec)
15ce34554SBagas Sanjaya // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * unicode.c
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  * PURPOSE
61da177e4SLinus Torvalds  *	Routines for converting between UTF-8 and OSTA Compressed Unicode.
71da177e4SLinus Torvalds  *      Also handles filename mangling
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  * DESCRIPTION
101da177e4SLinus Torvalds  *	OSTA Compressed Unicode is explained in the OSTA UDF specification.
111da177e4SLinus Torvalds  *		http://www.osta.org/
121da177e4SLinus Torvalds  *	UTF-8 is explained in the IETF RFC XXXX.
131da177e4SLinus Torvalds  *		ftp://ftp.internic.net/rfc/rfcxxxx.txt
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  */
161da177e4SLinus Torvalds 
171da177e4SLinus Torvalds #include "udfdecl.h"
181da177e4SLinus Torvalds 
191da177e4SLinus Torvalds #include <linux/kernel.h>
201da177e4SLinus Torvalds #include <linux/string.h>	/* for memset */
211da177e4SLinus Torvalds #include <linux/nls.h>
22f845fcedSBob Copeland #include <linux/crc-itu-t.h>
235a0e3ad6STejun Heo #include <linux/slab.h>
241da177e4SLinus Torvalds 
251da177e4SLinus Torvalds #include "udf_sb.h"
261da177e4SLinus Torvalds 
27ef2e18f1SJan Kara #define PLANE_SIZE 0x10000
28b8a41c44SJan Kara #define UNICODE_MAX 0x10ffff
2944f06ba8SJan Kara #define SURROGATE_MASK 0xfffff800
3044f06ba8SJan Kara #define SURROGATE_PAIR 0x0000d800
31ef2e18f1SJan Kara #define SURROGATE_LOW  0x00000400
32ef2e18f1SJan Kara #define SURROGATE_CHAR_BITS 10
33ef2e18f1SJan Kara #define SURROGATE_CHAR_MASK ((1 << SURROGATE_CHAR_BITS) - 1)
3444f06ba8SJan Kara 
35484a10f4SAndrew Gabbasov #define ILLEGAL_CHAR_MARK	'_'
36484a10f4SAndrew Gabbasov #define EXT_MARK		'.'
37484a10f4SAndrew Gabbasov #define CRC_MARK		'#'
38484a10f4SAndrew Gabbasov #define EXT_SIZE		5
39484a10f4SAndrew Gabbasov /* Number of chars we need to store generated CRC to make filename unique */
40484a10f4SAndrew Gabbasov #define CRC_LEN			5
41484a10f4SAndrew Gabbasov 
get_utf16_char(const uint8_t * str_i,int str_i_max_len,int str_i_idx,int u_ch,unicode_t * ret)428a0cdef1SJan Kara static unicode_t get_utf16_char(const uint8_t *str_i, int str_i_max_len,
438a0cdef1SJan Kara 				int str_i_idx, int u_ch, unicode_t *ret)
448a0cdef1SJan Kara {
458a0cdef1SJan Kara 	unicode_t c;
468a0cdef1SJan Kara 	int start_idx = str_i_idx;
478a0cdef1SJan Kara 
488a0cdef1SJan Kara 	/* Expand OSTA compressed Unicode to Unicode */
498a0cdef1SJan Kara 	c = str_i[str_i_idx++];
508a0cdef1SJan Kara 	if (u_ch > 1)
518a0cdef1SJan Kara 		c = (c << 8) | str_i[str_i_idx++];
528a0cdef1SJan Kara 	if ((c & SURROGATE_MASK) == SURROGATE_PAIR) {
538a0cdef1SJan Kara 		unicode_t next;
548a0cdef1SJan Kara 
558a0cdef1SJan Kara 		/* Trailing surrogate char */
568a0cdef1SJan Kara 		if (str_i_idx >= str_i_max_len) {
578a0cdef1SJan Kara 			c = UNICODE_MAX + 1;
588a0cdef1SJan Kara 			goto out;
598a0cdef1SJan Kara 		}
608a0cdef1SJan Kara 
618a0cdef1SJan Kara 		/* Low surrogate must follow the high one... */
628a0cdef1SJan Kara 		if (c & SURROGATE_LOW) {
638a0cdef1SJan Kara 			c = UNICODE_MAX + 1;
648a0cdef1SJan Kara 			goto out;
658a0cdef1SJan Kara 		}
668a0cdef1SJan Kara 
678a0cdef1SJan Kara 		WARN_ON_ONCE(u_ch != 2);
688a0cdef1SJan Kara 		next = str_i[str_i_idx++] << 8;
698a0cdef1SJan Kara 		next |= str_i[str_i_idx++];
708a0cdef1SJan Kara 		if ((next & SURROGATE_MASK) != SURROGATE_PAIR ||
718a0cdef1SJan Kara 		    !(next & SURROGATE_LOW)) {
728a0cdef1SJan Kara 			c = UNICODE_MAX + 1;
738a0cdef1SJan Kara 			goto out;
748a0cdef1SJan Kara 		}
758a0cdef1SJan Kara 
768a0cdef1SJan Kara 		c = PLANE_SIZE +
778a0cdef1SJan Kara 		    ((c & SURROGATE_CHAR_MASK) << SURROGATE_CHAR_BITS) +
788a0cdef1SJan Kara 		    (next & SURROGATE_CHAR_MASK);
798a0cdef1SJan Kara 	}
808a0cdef1SJan Kara out:
818a0cdef1SJan Kara 	*ret = c;
828a0cdef1SJan Kara 	return str_i_idx - start_idx;
838a0cdef1SJan Kara }
848a0cdef1SJan Kara 
858a0cdef1SJan Kara 
udf_name_conv_char(uint8_t * str_o,int str_o_max_len,int * str_o_idx,const uint8_t * str_i,int str_i_max_len,int * str_i_idx,int u_ch,int * needsCRC,int (* conv_f)(wchar_t,unsigned char *,int),int translate)86484a10f4SAndrew Gabbasov static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
87484a10f4SAndrew Gabbasov 			      int *str_o_idx,
88484a10f4SAndrew Gabbasov 			      const uint8_t *str_i, int str_i_max_len,
89484a10f4SAndrew Gabbasov 			      int *str_i_idx,
90484a10f4SAndrew Gabbasov 			      int u_ch, int *needsCRC,
91484a10f4SAndrew Gabbasov 			      int (*conv_f)(wchar_t, unsigned char *, int),
92484a10f4SAndrew Gabbasov 			      int translate)
93484a10f4SAndrew Gabbasov {
948a0cdef1SJan Kara 	unicode_t c;
95484a10f4SAndrew Gabbasov 	int illChar = 0;
96484a10f4SAndrew Gabbasov 	int len, gotch = 0;
97484a10f4SAndrew Gabbasov 
988a0cdef1SJan Kara 	while (!gotch && *str_i_idx < str_i_max_len) {
99484a10f4SAndrew Gabbasov 		if (*str_o_idx >= str_o_max_len) {
100484a10f4SAndrew Gabbasov 			*needsCRC = 1;
101484a10f4SAndrew Gabbasov 			return gotch;
102484a10f4SAndrew Gabbasov 		}
103484a10f4SAndrew Gabbasov 
1048a0cdef1SJan Kara 		len = get_utf16_char(str_i, str_i_max_len, *str_i_idx, u_ch,
1058a0cdef1SJan Kara 				     &c);
1068a0cdef1SJan Kara 		/* These chars cannot be converted. Replace them. */
1078a0cdef1SJan Kara 		if (c == 0 || c > UNICODE_MAX || (conv_f && c > MAX_WCHAR_T) ||
1088a0cdef1SJan Kara 		    (translate && c == '/')) {
109484a10f4SAndrew Gabbasov 			illChar = 1;
1108a0cdef1SJan Kara 			if (!translate)
1118a0cdef1SJan Kara 				gotch = 1;
1128a0cdef1SJan Kara 		} else if (illChar)
113484a10f4SAndrew Gabbasov 			break;
114484a10f4SAndrew Gabbasov 		else
115484a10f4SAndrew Gabbasov 			gotch = 1;
1168a0cdef1SJan Kara 		*str_i_idx += len;
117484a10f4SAndrew Gabbasov 	}
118484a10f4SAndrew Gabbasov 	if (illChar) {
119484a10f4SAndrew Gabbasov 		*needsCRC = 1;
120484a10f4SAndrew Gabbasov 		c = ILLEGAL_CHAR_MARK;
121484a10f4SAndrew Gabbasov 		gotch = 1;
122484a10f4SAndrew Gabbasov 	}
123484a10f4SAndrew Gabbasov 	if (gotch) {
1248a0cdef1SJan Kara 		if (conv_f) {
1258a0cdef1SJan Kara 			len = conv_f(c, &str_o[*str_o_idx],
1268a0cdef1SJan Kara 				     str_o_max_len - *str_o_idx);
1278a0cdef1SJan Kara 		} else {
1288a0cdef1SJan Kara 			len = utf32_to_utf8(c, &str_o[*str_o_idx],
1298a0cdef1SJan Kara 					    str_o_max_len - *str_o_idx);
1308a0cdef1SJan Kara 			if (len < 0)
1318a0cdef1SJan Kara 				len = -ENAMETOOLONG;
1328a0cdef1SJan Kara 		}
133484a10f4SAndrew Gabbasov 		/* Valid character? */
134484a10f4SAndrew Gabbasov 		if (len >= 0)
135484a10f4SAndrew Gabbasov 			*str_o_idx += len;
136484a10f4SAndrew Gabbasov 		else if (len == -ENAMETOOLONG) {
137484a10f4SAndrew Gabbasov 			*needsCRC = 1;
138484a10f4SAndrew Gabbasov 			gotch = 0;
139484a10f4SAndrew Gabbasov 		} else {
1408a0cdef1SJan Kara 			str_o[(*str_o_idx)++] = ILLEGAL_CHAR_MARK;
141484a10f4SAndrew Gabbasov 			*needsCRC = 1;
142484a10f4SAndrew Gabbasov 		}
143484a10f4SAndrew Gabbasov 	}
144484a10f4SAndrew Gabbasov 	return gotch;
145484a10f4SAndrew Gabbasov }
146484a10f4SAndrew Gabbasov 
udf_name_from_CS0(struct super_block * sb,uint8_t * str_o,int str_max_len,const uint8_t * ocu,int ocu_len,int translate)147d504adc2SJan Kara static int udf_name_from_CS0(struct super_block *sb,
148d504adc2SJan Kara 			     uint8_t *str_o, int str_max_len,
1499293fcfbSAndrew Gabbasov 			     const uint8_t *ocu, int ocu_len,
150484a10f4SAndrew Gabbasov 			     int translate)
1511da177e4SLinus Torvalds {
152484a10f4SAndrew Gabbasov 	uint32_t c;
1539293fcfbSAndrew Gabbasov 	uint8_t cmp_id;
154484a10f4SAndrew Gabbasov 	int idx, len;
155484a10f4SAndrew Gabbasov 	int u_ch;
156484a10f4SAndrew Gabbasov 	int needsCRC = 0;
157484a10f4SAndrew Gabbasov 	int ext_i_len, ext_max_len;
158484a10f4SAndrew Gabbasov 	int str_o_len = 0;	/* Length of resulting output */
159484a10f4SAndrew Gabbasov 	int ext_o_len = 0;	/* Extension output length */
160484a10f4SAndrew Gabbasov 	int ext_crc_len = 0;	/* Extension output length if used with CRC */
161484a10f4SAndrew Gabbasov 	int i_ext = -1;		/* Extension position in input buffer */
162484a10f4SAndrew Gabbasov 	int o_crc = 0;		/* Rightmost possible output pos for CRC+ext */
163484a10f4SAndrew Gabbasov 	unsigned short valueCRC;
164484a10f4SAndrew Gabbasov 	uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1];
165484a10f4SAndrew Gabbasov 	uint8_t crc[CRC_LEN];
166d504adc2SJan Kara 	int (*conv_f)(wchar_t, unsigned char *, int);
1671da177e4SLinus Torvalds 
1689293fcfbSAndrew Gabbasov 	if (str_max_len <= 0)
1699293fcfbSAndrew Gabbasov 		return 0;
1701da177e4SLinus Torvalds 
171cb00ea35SCyrill Gorcunov 	if (ocu_len == 0) {
1729293fcfbSAndrew Gabbasov 		memset(str_o, 0, str_max_len);
1731da177e4SLinus Torvalds 		return 0;
1741da177e4SLinus Torvalds 	}
1751da177e4SLinus Torvalds 
176b6453334SPali Rohár 	if (UDF_SB(sb)->s_nls_map)
177d504adc2SJan Kara 		conv_f = UDF_SB(sb)->s_nls_map->uni2char;
1788a0cdef1SJan Kara 	else
1798a0cdef1SJan Kara 		conv_f = NULL;
180d504adc2SJan Kara 
1819293fcfbSAndrew Gabbasov 	cmp_id = ocu[0];
18234f953ddSmarcin.slusarz@gmail.com 	if (cmp_id != 8 && cmp_id != 16) {
1839293fcfbSAndrew Gabbasov 		memset(str_o, 0, str_max_len);
184fcbf7637SSteve Magnani 		pr_err("unknown compression code (%u)\n", cmp_id);
185484a10f4SAndrew Gabbasov 		return -EINVAL;
186484a10f4SAndrew Gabbasov 	}
187484a10f4SAndrew Gabbasov 	u_ch = cmp_id >> 3;
188484a10f4SAndrew Gabbasov 
189484a10f4SAndrew Gabbasov 	ocu++;
190484a10f4SAndrew Gabbasov 	ocu_len--;
191484a10f4SAndrew Gabbasov 
192484a10f4SAndrew Gabbasov 	if (ocu_len % u_ch) {
193484a10f4SAndrew Gabbasov 		pr_err("incorrect filename length (%d)\n", ocu_len + 1);
19478fc2e69SFabian Frederick 		return -EINVAL;
1951da177e4SLinus Torvalds 	}
1961da177e4SLinus Torvalds 
197484a10f4SAndrew Gabbasov 	if (translate) {
198484a10f4SAndrew Gabbasov 		/* Look for extension */
199484a10f4SAndrew Gabbasov 		for (idx = ocu_len - u_ch, ext_i_len = 0;
200484a10f4SAndrew Gabbasov 		     (idx >= 0) && (ext_i_len < EXT_SIZE);
201484a10f4SAndrew Gabbasov 		     idx -= u_ch, ext_i_len++) {
202484a10f4SAndrew Gabbasov 			c = ocu[idx];
203484a10f4SAndrew Gabbasov 			if (u_ch > 1)
204484a10f4SAndrew Gabbasov 				c = (c << 8) | ocu[idx + 1];
2051da177e4SLinus Torvalds 
206484a10f4SAndrew Gabbasov 			if (c == EXT_MARK) {
207484a10f4SAndrew Gabbasov 				if (ext_i_len)
208484a10f4SAndrew Gabbasov 					i_ext = idx;
2093e7fc205SAndrew Gabbasov 				break;
210484a10f4SAndrew Gabbasov 			}
211484a10f4SAndrew Gabbasov 		}
212484a10f4SAndrew Gabbasov 		if (i_ext >= 0) {
213484a10f4SAndrew Gabbasov 			/* Convert extension */
214484a10f4SAndrew Gabbasov 			ext_max_len = min_t(int, sizeof(ext), str_max_len);
215484a10f4SAndrew Gabbasov 			ext[ext_o_len++] = EXT_MARK;
216484a10f4SAndrew Gabbasov 			idx = i_ext + u_ch;
217484a10f4SAndrew Gabbasov 			while (udf_name_conv_char(ext, ext_max_len, &ext_o_len,
218484a10f4SAndrew Gabbasov 						  ocu, ocu_len, &idx,
219484a10f4SAndrew Gabbasov 						  u_ch, &needsCRC,
220484a10f4SAndrew Gabbasov 						  conv_f, translate)) {
221484a10f4SAndrew Gabbasov 				if ((ext_o_len + CRC_LEN) < str_max_len)
222484a10f4SAndrew Gabbasov 					ext_crc_len = ext_o_len;
223484a10f4SAndrew Gabbasov 			}
224484a10f4SAndrew Gabbasov 		}
225484a10f4SAndrew Gabbasov 	}
226484a10f4SAndrew Gabbasov 
227484a10f4SAndrew Gabbasov 	idx = 0;
228484a10f4SAndrew Gabbasov 	while (1) {
229484a10f4SAndrew Gabbasov 		if (translate && (idx == i_ext)) {
230484a10f4SAndrew Gabbasov 			if (str_o_len > (str_max_len - ext_o_len))
231484a10f4SAndrew Gabbasov 				needsCRC = 1;
232484a10f4SAndrew Gabbasov 			break;
233484a10f4SAndrew Gabbasov 		}
234484a10f4SAndrew Gabbasov 
235484a10f4SAndrew Gabbasov 		if (!udf_name_conv_char(str_o, str_max_len, &str_o_len,
236484a10f4SAndrew Gabbasov 					ocu, ocu_len, &idx,
237484a10f4SAndrew Gabbasov 					u_ch, &needsCRC, conv_f, translate))
238484a10f4SAndrew Gabbasov 			break;
239484a10f4SAndrew Gabbasov 
240484a10f4SAndrew Gabbasov 		if (translate &&
241484a10f4SAndrew Gabbasov 		    (str_o_len <= (str_max_len - ext_o_len - CRC_LEN)))
242484a10f4SAndrew Gabbasov 			o_crc = str_o_len;
243484a10f4SAndrew Gabbasov 	}
244484a10f4SAndrew Gabbasov 
245484a10f4SAndrew Gabbasov 	if (translate) {
246*028f6055SJan Kara 		if (str_o_len > 0 && str_o_len <= 2 && str_o[0] == '.' &&
247484a10f4SAndrew Gabbasov 		    (str_o_len == 1 || str_o[1] == '.'))
248484a10f4SAndrew Gabbasov 			needsCRC = 1;
249484a10f4SAndrew Gabbasov 		if (needsCRC) {
250484a10f4SAndrew Gabbasov 			str_o_len = o_crc;
251484a10f4SAndrew Gabbasov 			valueCRC = crc_itu_t(0, ocu, ocu_len);
252484a10f4SAndrew Gabbasov 			crc[0] = CRC_MARK;
253484a10f4SAndrew Gabbasov 			crc[1] = hex_asc_upper_hi(valueCRC >> 8);
254484a10f4SAndrew Gabbasov 			crc[2] = hex_asc_upper_lo(valueCRC >> 8);
255484a10f4SAndrew Gabbasov 			crc[3] = hex_asc_upper_hi(valueCRC);
256484a10f4SAndrew Gabbasov 			crc[4] = hex_asc_upper_lo(valueCRC);
257484a10f4SAndrew Gabbasov 			len = min_t(int, CRC_LEN, str_max_len - str_o_len);
258484a10f4SAndrew Gabbasov 			memcpy(&str_o[str_o_len], crc, len);
259484a10f4SAndrew Gabbasov 			str_o_len += len;
260484a10f4SAndrew Gabbasov 			ext_o_len = ext_crc_len;
261484a10f4SAndrew Gabbasov 		}
262484a10f4SAndrew Gabbasov 		if (ext_o_len > 0) {
263484a10f4SAndrew Gabbasov 			memcpy(&str_o[str_o_len], ext, ext_o_len);
264484a10f4SAndrew Gabbasov 			str_o_len += ext_o_len;
265484a10f4SAndrew Gabbasov 		}
2661da177e4SLinus Torvalds 	}
2671da177e4SLinus Torvalds 
2689293fcfbSAndrew Gabbasov 	return str_o_len;
2699293fcfbSAndrew Gabbasov }
2709293fcfbSAndrew Gabbasov 
udf_name_to_CS0(struct super_block * sb,uint8_t * ocu,int ocu_max_len,const uint8_t * str_i,int str_len)271d504adc2SJan Kara static int udf_name_to_CS0(struct super_block *sb,
272d504adc2SJan Kara 			   uint8_t *ocu, int ocu_max_len,
273d504adc2SJan Kara 			   const uint8_t *str_i, int str_len)
2741da177e4SLinus Torvalds {
2753e7fc205SAndrew Gabbasov 	int i, len;
2763e7fc205SAndrew Gabbasov 	unsigned int max_val;
277bb00c898SAndrew Gabbasov 	int u_len, u_ch;
278ef2e18f1SJan Kara 	unicode_t uni_char;
279d504adc2SJan Kara 	int (*conv_f)(const unsigned char *, int, wchar_t *);
2801da177e4SLinus Torvalds 
2819293fcfbSAndrew Gabbasov 	if (ocu_max_len <= 0)
2829293fcfbSAndrew Gabbasov 		return 0;
2839293fcfbSAndrew Gabbasov 
284b6453334SPali Rohár 	if (UDF_SB(sb)->s_nls_map)
285d504adc2SJan Kara 		conv_f = UDF_SB(sb)->s_nls_map->char2uni;
286ef2e18f1SJan Kara 	else
287ef2e18f1SJan Kara 		conv_f = NULL;
288d504adc2SJan Kara 
2899293fcfbSAndrew Gabbasov 	memset(ocu, 0, ocu_max_len);
2901da177e4SLinus Torvalds 	ocu[0] = 8;
2913e7fc205SAndrew Gabbasov 	max_val = 0xff;
292bb00c898SAndrew Gabbasov 	u_ch = 1;
2931da177e4SLinus Torvalds 
2941da177e4SLinus Torvalds try_again:
2959293fcfbSAndrew Gabbasov 	u_len = 1;
296ef2e18f1SJan Kara 	for (i = 0; i < str_len; i += len) {
297bb00c898SAndrew Gabbasov 		/* Name didn't fit? */
2989293fcfbSAndrew Gabbasov 		if (u_len + u_ch > ocu_max_len)
299bb00c898SAndrew Gabbasov 			return 0;
300ef2e18f1SJan Kara 		if (conv_f) {
301ef2e18f1SJan Kara 			wchar_t wchar;
302ef2e18f1SJan Kara 
303ef2e18f1SJan Kara 			len = conv_f(&str_i[i], str_len - i, &wchar);
304ef2e18f1SJan Kara 			if (len > 0)
305ef2e18f1SJan Kara 				uni_char = wchar;
306ef2e18f1SJan Kara 		} else {
307ef2e18f1SJan Kara 			len = utf8_to_utf32(&str_i[i], str_len - i,
308ef2e18f1SJan Kara 					    &uni_char);
309ef2e18f1SJan Kara 		}
31059285c28SJan Kara 		/* Invalid character, deal with it */
311ef2e18f1SJan Kara 		if (len <= 0 || uni_char > UNICODE_MAX) {
31259285c28SJan Kara 			len = 1;
31359285c28SJan Kara 			uni_char = '?';
31459285c28SJan Kara 		}
3151da177e4SLinus Torvalds 
316cb00ea35SCyrill Gorcunov 		if (uni_char > max_val) {
317ef2e18f1SJan Kara 			unicode_t c;
318ef2e18f1SJan Kara 
319ef2e18f1SJan Kara 			if (max_val == 0xff) {
3203e7fc205SAndrew Gabbasov 				max_val = 0xffff;
3213e7fc205SAndrew Gabbasov 				ocu[0] = 0x10;
322bb00c898SAndrew Gabbasov 				u_ch = 2;
3231da177e4SLinus Torvalds 				goto try_again;
3241da177e4SLinus Torvalds 			}
325ef2e18f1SJan Kara 			/*
326ef2e18f1SJan Kara 			 * Use UTF-16 encoding for chars outside we
327ef2e18f1SJan Kara 			 * cannot encode directly.
328ef2e18f1SJan Kara 			 */
329ef2e18f1SJan Kara 			if (u_len + 2 * u_ch > ocu_max_len)
330ef2e18f1SJan Kara 				return 0;
331ef2e18f1SJan Kara 
332ef2e18f1SJan Kara 			uni_char -= PLANE_SIZE;
333ef2e18f1SJan Kara 			c = SURROGATE_PAIR |
334ef2e18f1SJan Kara 			    ((uni_char >> SURROGATE_CHAR_BITS) &
335ef2e18f1SJan Kara 			     SURROGATE_CHAR_MASK);
336ef2e18f1SJan Kara 			ocu[u_len++] = (uint8_t)(c >> 8);
337ef2e18f1SJan Kara 			ocu[u_len++] = (uint8_t)(c & 0xff);
338ef2e18f1SJan Kara 			uni_char = SURROGATE_PAIR | SURROGATE_LOW |
339ef2e18f1SJan Kara 					(uni_char & SURROGATE_CHAR_MASK);
340ef2e18f1SJan Kara 		}
3411da177e4SLinus Torvalds 
3423e7fc205SAndrew Gabbasov 		if (max_val == 0xffff)
3439293fcfbSAndrew Gabbasov 			ocu[u_len++] = (uint8_t)(uni_char >> 8);
3449293fcfbSAndrew Gabbasov 		ocu[u_len++] = (uint8_t)(uni_char & 0xff);
3451da177e4SLinus Torvalds 	}
3461da177e4SLinus Torvalds 
3479293fcfbSAndrew Gabbasov 	return u_len;
3481da177e4SLinus Torvalds }
3491da177e4SLinus Torvalds 
350b54e41f5SJan Kara /*
351b54e41f5SJan Kara  * Convert CS0 dstring to output charset. Warning: This function may truncate
352b54e41f5SJan Kara  * input string if it is too long as it is used for informational strings only
353b54e41f5SJan Kara  * and it is better to truncate the string than to refuse mounting a media.
354b54e41f5SJan Kara  */
udf_dstrCS0toChar(struct super_block * sb,uint8_t * utf_o,int o_len,const uint8_t * ocu_i,int i_len)355e966fc8dSJan Kara int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len,
356c26f6c61SAndrew Gabbasov 		      const uint8_t *ocu_i, int i_len)
3573e7fc205SAndrew Gabbasov {
358c26f6c61SAndrew Gabbasov 	int s_len = 0;
359c26f6c61SAndrew Gabbasov 
360c26f6c61SAndrew Gabbasov 	if (i_len > 0) {
361c26f6c61SAndrew Gabbasov 		s_len = ocu_i[i_len - 1];
362c26f6c61SAndrew Gabbasov 		if (s_len >= i_len) {
363b54e41f5SJan Kara 			pr_warn("incorrect dstring lengths (%d/%d),"
364b54e41f5SJan Kara 				" truncating\n", s_len, i_len);
365b54e41f5SJan Kara 			s_len = i_len - 1;
366b54e41f5SJan Kara 			/* 2-byte encoding? Need to round properly... */
367b54e41f5SJan Kara 			if (ocu_i[0] == 16)
368b54e41f5SJan Kara 				s_len -= (s_len - 1) & 2;
369c26f6c61SAndrew Gabbasov 		}
370c26f6c61SAndrew Gabbasov 	}
371c26f6c61SAndrew Gabbasov 
372d504adc2SJan Kara 	return udf_name_from_CS0(sb, utf_o, o_len, ocu_i, s_len, 0);
3733e7fc205SAndrew Gabbasov }
3743e7fc205SAndrew Gabbasov 
udf_get_filename(struct super_block * sb,const uint8_t * sname,int slen,uint8_t * dname,int dlen)3759293fcfbSAndrew Gabbasov int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen,
3760e5cc9a4SJan Kara 		     uint8_t *dname, int dlen)
3771da177e4SLinus Torvalds {
3786ce63836SFabian Frederick 	int ret;
3791da177e4SLinus Torvalds 
38031f2566fSFabian Frederick 	if (!slen)
38131f2566fSFabian Frederick 		return -EIO;
38231f2566fSFabian Frederick 
3839293fcfbSAndrew Gabbasov 	if (dlen <= 0)
3849293fcfbSAndrew Gabbasov 		return 0;
3859293fcfbSAndrew Gabbasov 
386d504adc2SJan Kara 	ret = udf_name_from_CS0(sb, dname, dlen, sname, slen, 1);
3876ce63836SFabian Frederick 	/* Zero length filename isn't valid... */
3886ce63836SFabian Frederick 	if (ret == 0)
3896ce63836SFabian Frederick 		ret = -EINVAL;
3905ceb8b55SFabian Frederick 	return ret;
3911da177e4SLinus Torvalds }
3921da177e4SLinus Torvalds 
udf_put_filename(struct super_block * sb,const uint8_t * sname,int slen,uint8_t * dname,int dlen)393525e2c56SAndrew Gabbasov int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
394525e2c56SAndrew Gabbasov 		     uint8_t *dname, int dlen)
3951da177e4SLinus Torvalds {
396d504adc2SJan Kara 	return udf_name_to_CS0(sb, dname, dlen, sname, slen);
3971da177e4SLinus Torvalds }
3981da177e4SLinus Torvalds 
399