xref: /openbmc/linux/fs/nls/nls_ucs2_utils.h (revision f3a9b375)
1089f7f59SDr. David Alan Gilbert /* SPDX-License-Identifier: GPL-2.0-or-later */
2089f7f59SDr. David Alan Gilbert /*
3089f7f59SDr. David Alan Gilbert  * Some of the source code in this file came from fs/cifs/cifs_unicode.c
4089f7f59SDr. David Alan Gilbert  * and then via server/unicode.c
5089f7f59SDr. David Alan Gilbert  * cifs_unicode:  Unicode kernel case support
6089f7f59SDr. David Alan Gilbert  *
7089f7f59SDr. David Alan Gilbert  * Function:
8089f7f59SDr. David Alan Gilbert  *     Convert a unicode character to upper or lower case using
9089f7f59SDr. David Alan Gilbert  *     compressed tables.
10089f7f59SDr. David Alan Gilbert  *
11089f7f59SDr. David Alan Gilbert  *   Copyright (c) International Business Machines  Corp., 2000,2009
12089f7f59SDr. David Alan Gilbert  *
13089f7f59SDr. David Alan Gilbert  *
14089f7f59SDr. David Alan Gilbert  * Notes:
15089f7f59SDr. David Alan Gilbert  *     These APIs are based on the C library functions.  The semantics
16089f7f59SDr. David Alan Gilbert  *     should match the C functions but with expanded size operands.
17089f7f59SDr. David Alan Gilbert  *
18089f7f59SDr. David Alan Gilbert  *     The upper/lower functions are based on a table created by mkupr.
19089f7f59SDr. David Alan Gilbert  *     This is a compressed table of upper and lower case conversion.
20089f7f59SDr. David Alan Gilbert  *
21089f7f59SDr. David Alan Gilbert  */
22089f7f59SDr. David Alan Gilbert #ifndef _NLS_UCS2_UTILS_H
23089f7f59SDr. David Alan Gilbert #define _NLS_UCS2_UTILS_H
24089f7f59SDr. David Alan Gilbert 
25089f7f59SDr. David Alan Gilbert #include <asm/byteorder.h>
26089f7f59SDr. David Alan Gilbert #include <linux/types.h>
27089f7f59SDr. David Alan Gilbert #include <linux/nls.h>
28089f7f59SDr. David Alan Gilbert #include <linux/unicode.h>
29*f3a9b375SDr. David Alan Gilbert #include "nls_ucs2_data.h"
30089f7f59SDr. David Alan Gilbert 
31089f7f59SDr. David Alan Gilbert /*
32089f7f59SDr. David Alan Gilbert  * Windows maps these to the user defined 16 bit Unicode range since they are
33089f7f59SDr. David Alan Gilbert  * reserved symbols (along with \ and /), otherwise illegal to store
34089f7f59SDr. David Alan Gilbert  * in filenames in NTFS
35089f7f59SDr. David Alan Gilbert  */
36089f7f59SDr. David Alan Gilbert #define UNI_ASTERISK    ((__u16)('*' + 0xF000))
37089f7f59SDr. David Alan Gilbert #define UNI_QUESTION    ((__u16)('?' + 0xF000))
38089f7f59SDr. David Alan Gilbert #define UNI_COLON       ((__u16)(':' + 0xF000))
39089f7f59SDr. David Alan Gilbert #define UNI_GRTRTHAN    ((__u16)('>' + 0xF000))
40089f7f59SDr. David Alan Gilbert #define UNI_LESSTHAN    ((__u16)('<' + 0xF000))
41089f7f59SDr. David Alan Gilbert #define UNI_PIPE        ((__u16)('|' + 0xF000))
42089f7f59SDr. David Alan Gilbert #define UNI_SLASH       ((__u16)('\\' + 0xF000))
43089f7f59SDr. David Alan Gilbert 
44089f7f59SDr. David Alan Gilbert /*
45089f7f59SDr. David Alan Gilbert  * UniStrcat:  Concatenate the second string to the first
46089f7f59SDr. David Alan Gilbert  *
47089f7f59SDr. David Alan Gilbert  * Returns:
48089f7f59SDr. David Alan Gilbert  *     Address of the first string
49089f7f59SDr. David Alan Gilbert  */
UniStrcat(wchar_t * ucs1,const wchar_t * ucs2)50089f7f59SDr. David Alan Gilbert static inline wchar_t *UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
51089f7f59SDr. David Alan Gilbert {
52089f7f59SDr. David Alan Gilbert 	wchar_t *anchor = ucs1;	/* save a pointer to start of ucs1 */
53089f7f59SDr. David Alan Gilbert 
54089f7f59SDr. David Alan Gilbert 	while (*ucs1++)
55089f7f59SDr. David Alan Gilbert 	/*NULL*/;	/* To end of first string */
56089f7f59SDr. David Alan Gilbert 	ucs1--;			/* Return to the null */
57089f7f59SDr. David Alan Gilbert 	while ((*ucs1++ = *ucs2++))
58089f7f59SDr. David Alan Gilbert 	/*NULL*/;	/* copy string 2 over */
59089f7f59SDr. David Alan Gilbert 	return anchor;
60089f7f59SDr. David Alan Gilbert }
61089f7f59SDr. David Alan Gilbert 
62089f7f59SDr. David Alan Gilbert /*
63089f7f59SDr. David Alan Gilbert  * UniStrchr:  Find a character in a string
64089f7f59SDr. David Alan Gilbert  *
65089f7f59SDr. David Alan Gilbert  * Returns:
66089f7f59SDr. David Alan Gilbert  *     Address of first occurrence of character in string
67089f7f59SDr. David Alan Gilbert  *     or NULL if the character is not in the string
68089f7f59SDr. David Alan Gilbert  */
UniStrchr(const wchar_t * ucs,wchar_t uc)69089f7f59SDr. David Alan Gilbert static inline wchar_t *UniStrchr(const wchar_t *ucs, wchar_t uc)
70089f7f59SDr. David Alan Gilbert {
71089f7f59SDr. David Alan Gilbert 	while ((*ucs != uc) && *ucs)
72089f7f59SDr. David Alan Gilbert 		ucs++;
73089f7f59SDr. David Alan Gilbert 
74089f7f59SDr. David Alan Gilbert 	if (*ucs == uc)
75089f7f59SDr. David Alan Gilbert 		return (wchar_t *)ucs;
76089f7f59SDr. David Alan Gilbert 	return NULL;
77089f7f59SDr. David Alan Gilbert }
78089f7f59SDr. David Alan Gilbert 
79089f7f59SDr. David Alan Gilbert /*
80089f7f59SDr. David Alan Gilbert  * UniStrcmp:  Compare two strings
81089f7f59SDr. David Alan Gilbert  *
82089f7f59SDr. David Alan Gilbert  * Returns:
83089f7f59SDr. David Alan Gilbert  *     < 0:  First string is less than second
84089f7f59SDr. David Alan Gilbert  *     = 0:  Strings are equal
85089f7f59SDr. David Alan Gilbert  *     > 0:  First string is greater than second
86089f7f59SDr. David Alan Gilbert  */
UniStrcmp(const wchar_t * ucs1,const wchar_t * ucs2)87089f7f59SDr. David Alan Gilbert static inline int UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
88089f7f59SDr. David Alan Gilbert {
89089f7f59SDr. David Alan Gilbert 	while ((*ucs1 == *ucs2) && *ucs1) {
90089f7f59SDr. David Alan Gilbert 		ucs1++;
91089f7f59SDr. David Alan Gilbert 		ucs2++;
92089f7f59SDr. David Alan Gilbert 	}
93089f7f59SDr. David Alan Gilbert 	return (int)*ucs1 - (int)*ucs2;
94089f7f59SDr. David Alan Gilbert }
95089f7f59SDr. David Alan Gilbert 
96089f7f59SDr. David Alan Gilbert /*
97089f7f59SDr. David Alan Gilbert  * UniStrcpy:  Copy a string
98089f7f59SDr. David Alan Gilbert  */
UniStrcpy(wchar_t * ucs1,const wchar_t * ucs2)99089f7f59SDr. David Alan Gilbert static inline wchar_t *UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
100089f7f59SDr. David Alan Gilbert {
101089f7f59SDr. David Alan Gilbert 	wchar_t *anchor = ucs1;	/* save the start of result string */
102089f7f59SDr. David Alan Gilbert 
103089f7f59SDr. David Alan Gilbert 	while ((*ucs1++ = *ucs2++))
104089f7f59SDr. David Alan Gilbert 	/*NULL*/;
105089f7f59SDr. David Alan Gilbert 	return anchor;
106089f7f59SDr. David Alan Gilbert }
107089f7f59SDr. David Alan Gilbert 
108089f7f59SDr. David Alan Gilbert /*
109089f7f59SDr. David Alan Gilbert  * UniStrlen:  Return the length of a string (in 16 bit Unicode chars not bytes)
110089f7f59SDr. David Alan Gilbert  */
UniStrlen(const wchar_t * ucs1)111089f7f59SDr. David Alan Gilbert static inline size_t UniStrlen(const wchar_t *ucs1)
112089f7f59SDr. David Alan Gilbert {
113089f7f59SDr. David Alan Gilbert 	int i = 0;
114089f7f59SDr. David Alan Gilbert 
115089f7f59SDr. David Alan Gilbert 	while (*ucs1++)
116089f7f59SDr. David Alan Gilbert 		i++;
117089f7f59SDr. David Alan Gilbert 	return i;
118089f7f59SDr. David Alan Gilbert }
119089f7f59SDr. David Alan Gilbert 
120089f7f59SDr. David Alan Gilbert /*
121089f7f59SDr. David Alan Gilbert  * UniStrnlen:  Return the length (in 16 bit Unicode chars not bytes) of a
122089f7f59SDr. David Alan Gilbert  *		string (length limited)
123089f7f59SDr. David Alan Gilbert  */
UniStrnlen(const wchar_t * ucs1,int maxlen)124089f7f59SDr. David Alan Gilbert static inline size_t UniStrnlen(const wchar_t *ucs1, int maxlen)
125089f7f59SDr. David Alan Gilbert {
126089f7f59SDr. David Alan Gilbert 	int i = 0;
127089f7f59SDr. David Alan Gilbert 
128089f7f59SDr. David Alan Gilbert 	while (*ucs1++) {
129089f7f59SDr. David Alan Gilbert 		i++;
130089f7f59SDr. David Alan Gilbert 		if (i >= maxlen)
131089f7f59SDr. David Alan Gilbert 			break;
132089f7f59SDr. David Alan Gilbert 	}
133089f7f59SDr. David Alan Gilbert 	return i;
134089f7f59SDr. David Alan Gilbert }
135089f7f59SDr. David Alan Gilbert 
136089f7f59SDr. David Alan Gilbert /*
137089f7f59SDr. David Alan Gilbert  * UniStrncat:  Concatenate length limited string
138089f7f59SDr. David Alan Gilbert  */
UniStrncat(wchar_t * ucs1,const wchar_t * ucs2,size_t n)139089f7f59SDr. David Alan Gilbert static inline wchar_t *UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
140089f7f59SDr. David Alan Gilbert {
141089f7f59SDr. David Alan Gilbert 	wchar_t *anchor = ucs1;	/* save pointer to string 1 */
142089f7f59SDr. David Alan Gilbert 
143089f7f59SDr. David Alan Gilbert 	while (*ucs1++)
144089f7f59SDr. David Alan Gilbert 	/*NULL*/;
145089f7f59SDr. David Alan Gilbert 	ucs1--;			/* point to null terminator of s1 */
146089f7f59SDr. David Alan Gilbert 	while (n-- && (*ucs1 = *ucs2)) {	/* copy s2 after s1 */
147089f7f59SDr. David Alan Gilbert 		ucs1++;
148089f7f59SDr. David Alan Gilbert 		ucs2++;
149089f7f59SDr. David Alan Gilbert 	}
150089f7f59SDr. David Alan Gilbert 	*ucs1 = 0;		/* Null terminate the result */
151089f7f59SDr. David Alan Gilbert 	return anchor;
152089f7f59SDr. David Alan Gilbert }
153089f7f59SDr. David Alan Gilbert 
154089f7f59SDr. David Alan Gilbert /*
155089f7f59SDr. David Alan Gilbert  * UniStrncmp:  Compare length limited string
156089f7f59SDr. David Alan Gilbert  */
UniStrncmp(const wchar_t * ucs1,const wchar_t * ucs2,size_t n)157089f7f59SDr. David Alan Gilbert static inline int UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
158089f7f59SDr. David Alan Gilbert {
159089f7f59SDr. David Alan Gilbert 	if (!n)
160089f7f59SDr. David Alan Gilbert 		return 0;	/* Null strings are equal */
161089f7f59SDr. David Alan Gilbert 	while ((*ucs1 == *ucs2) && *ucs1 && --n) {
162089f7f59SDr. David Alan Gilbert 		ucs1++;
163089f7f59SDr. David Alan Gilbert 		ucs2++;
164089f7f59SDr. David Alan Gilbert 	}
165089f7f59SDr. David Alan Gilbert 	return (int)*ucs1 - (int)*ucs2;
166089f7f59SDr. David Alan Gilbert }
167089f7f59SDr. David Alan Gilbert 
168089f7f59SDr. David Alan Gilbert /*
169089f7f59SDr. David Alan Gilbert  * UniStrncmp_le:  Compare length limited string - native to little-endian
170089f7f59SDr. David Alan Gilbert  */
171089f7f59SDr. David Alan Gilbert static inline int
UniStrncmp_le(const wchar_t * ucs1,const wchar_t * ucs2,size_t n)172089f7f59SDr. David Alan Gilbert UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
173089f7f59SDr. David Alan Gilbert {
174089f7f59SDr. David Alan Gilbert 	if (!n)
175089f7f59SDr. David Alan Gilbert 		return 0;	/* Null strings are equal */
176089f7f59SDr. David Alan Gilbert 	while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
177089f7f59SDr. David Alan Gilbert 		ucs1++;
178089f7f59SDr. David Alan Gilbert 		ucs2++;
179089f7f59SDr. David Alan Gilbert 	}
180089f7f59SDr. David Alan Gilbert 	return (int)*ucs1 - (int)__le16_to_cpu(*ucs2);
181089f7f59SDr. David Alan Gilbert }
182089f7f59SDr. David Alan Gilbert 
183089f7f59SDr. David Alan Gilbert /*
184089f7f59SDr. David Alan Gilbert  * UniStrncpy:  Copy length limited string with pad
185089f7f59SDr. David Alan Gilbert  */
UniStrncpy(wchar_t * ucs1,const wchar_t * ucs2,size_t n)186089f7f59SDr. David Alan Gilbert static inline wchar_t *UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
187089f7f59SDr. David Alan Gilbert {
188089f7f59SDr. David Alan Gilbert 	wchar_t *anchor = ucs1;
189089f7f59SDr. David Alan Gilbert 
190089f7f59SDr. David Alan Gilbert 	while (n-- && *ucs2)	/* Copy the strings */
191089f7f59SDr. David Alan Gilbert 		*ucs1++ = *ucs2++;
192089f7f59SDr. David Alan Gilbert 
193089f7f59SDr. David Alan Gilbert 	n++;
194089f7f59SDr. David Alan Gilbert 	while (n--)		/* Pad with nulls */
195089f7f59SDr. David Alan Gilbert 		*ucs1++ = 0;
196089f7f59SDr. David Alan Gilbert 	return anchor;
197089f7f59SDr. David Alan Gilbert }
198089f7f59SDr. David Alan Gilbert 
199089f7f59SDr. David Alan Gilbert /*
200089f7f59SDr. David Alan Gilbert  * UniStrncpy_le:  Copy length limited string with pad to little-endian
201089f7f59SDr. David Alan Gilbert  */
UniStrncpy_le(wchar_t * ucs1,const wchar_t * ucs2,size_t n)202089f7f59SDr. David Alan Gilbert static inline wchar_t *UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
203089f7f59SDr. David Alan Gilbert {
204089f7f59SDr. David Alan Gilbert 	wchar_t *anchor = ucs1;
205089f7f59SDr. David Alan Gilbert 
206089f7f59SDr. David Alan Gilbert 	while (n-- && *ucs2)	/* Copy the strings */
207089f7f59SDr. David Alan Gilbert 		*ucs1++ = __le16_to_cpu(*ucs2++);
208089f7f59SDr. David Alan Gilbert 
209089f7f59SDr. David Alan Gilbert 	n++;
210089f7f59SDr. David Alan Gilbert 	while (n--)		/* Pad with nulls */
211089f7f59SDr. David Alan Gilbert 		*ucs1++ = 0;
212089f7f59SDr. David Alan Gilbert 	return anchor;
213089f7f59SDr. David Alan Gilbert }
214089f7f59SDr. David Alan Gilbert 
215089f7f59SDr. David Alan Gilbert /*
216089f7f59SDr. David Alan Gilbert  * UniStrstr:  Find a string in a string
217089f7f59SDr. David Alan Gilbert  *
218089f7f59SDr. David Alan Gilbert  * Returns:
219089f7f59SDr. David Alan Gilbert  *     Address of first match found
220089f7f59SDr. David Alan Gilbert  *     NULL if no matching string is found
221089f7f59SDr. David Alan Gilbert  */
UniStrstr(const wchar_t * ucs1,const wchar_t * ucs2)222089f7f59SDr. David Alan Gilbert static inline wchar_t *UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
223089f7f59SDr. David Alan Gilbert {
224089f7f59SDr. David Alan Gilbert 	const wchar_t *anchor1 = ucs1;
225089f7f59SDr. David Alan Gilbert 	const wchar_t *anchor2 = ucs2;
226089f7f59SDr. David Alan Gilbert 
227089f7f59SDr. David Alan Gilbert 	while (*ucs1) {
228089f7f59SDr. David Alan Gilbert 		if (*ucs1 == *ucs2) {
229089f7f59SDr. David Alan Gilbert 			/* Partial match found */
230089f7f59SDr. David Alan Gilbert 			ucs1++;
231089f7f59SDr. David Alan Gilbert 			ucs2++;
232089f7f59SDr. David Alan Gilbert 		} else {
233089f7f59SDr. David Alan Gilbert 			if (!*ucs2)	/* Match found */
234089f7f59SDr. David Alan Gilbert 				return (wchar_t *)anchor1;
235089f7f59SDr. David Alan Gilbert 			ucs1 = ++anchor1;	/* No match */
236089f7f59SDr. David Alan Gilbert 			ucs2 = anchor2;
237089f7f59SDr. David Alan Gilbert 		}
238089f7f59SDr. David Alan Gilbert 	}
239089f7f59SDr. David Alan Gilbert 
240089f7f59SDr. David Alan Gilbert 	if (!*ucs2)		/* Both end together */
241089f7f59SDr. David Alan Gilbert 		return (wchar_t *)anchor1;	/* Match found */
242089f7f59SDr. David Alan Gilbert 	return NULL;		/* No match */
243089f7f59SDr. David Alan Gilbert }
244089f7f59SDr. David Alan Gilbert 
245089f7f59SDr. David Alan Gilbert #ifndef UNIUPR_NOUPPER
246089f7f59SDr. David Alan Gilbert /*
247089f7f59SDr. David Alan Gilbert  * UniToupper:  Convert a unicode character to upper case
248089f7f59SDr. David Alan Gilbert  */
UniToupper(register wchar_t uc)249089f7f59SDr. David Alan Gilbert static inline wchar_t UniToupper(register wchar_t uc)
250089f7f59SDr. David Alan Gilbert {
251089f7f59SDr. David Alan Gilbert 	register const struct UniCaseRange *rp;
252089f7f59SDr. David Alan Gilbert 
253089f7f59SDr. David Alan Gilbert 	if (uc < sizeof(NlsUniUpperTable)) {
254089f7f59SDr. David Alan Gilbert 		/* Latin characters */
255089f7f59SDr. David Alan Gilbert 		return uc + NlsUniUpperTable[uc];	/* Use base tables */
256089f7f59SDr. David Alan Gilbert 	}
257089f7f59SDr. David Alan Gilbert 
258089f7f59SDr. David Alan Gilbert 	rp = NlsUniUpperRange;	/* Use range tables */
259089f7f59SDr. David Alan Gilbert 	while (rp->start) {
260089f7f59SDr. David Alan Gilbert 		if (uc < rp->start)	/* Before start of range */
261089f7f59SDr. David Alan Gilbert 			return uc;	/* Uppercase = input */
262089f7f59SDr. David Alan Gilbert 		if (uc <= rp->end)	/* In range */
263089f7f59SDr. David Alan Gilbert 			return uc + rp->table[uc - rp->start];
264089f7f59SDr. David Alan Gilbert 		rp++;	/* Try next range */
265089f7f59SDr. David Alan Gilbert 	}
266089f7f59SDr. David Alan Gilbert 	return uc;		/* Past last range */
267089f7f59SDr. David Alan Gilbert }
268089f7f59SDr. David Alan Gilbert 
269089f7f59SDr. David Alan Gilbert /*
270089f7f59SDr. David Alan Gilbert  * UniStrupr:  Upper case a unicode string
271089f7f59SDr. David Alan Gilbert  */
UniStrupr(register __le16 * upin)272089f7f59SDr. David Alan Gilbert static inline __le16 *UniStrupr(register __le16 *upin)
273089f7f59SDr. David Alan Gilbert {
274089f7f59SDr. David Alan Gilbert 	register __le16 *up;
275089f7f59SDr. David Alan Gilbert 
276089f7f59SDr. David Alan Gilbert 	up = upin;
277089f7f59SDr. David Alan Gilbert 	while (*up) {		/* For all characters */
278089f7f59SDr. David Alan Gilbert 		*up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
279089f7f59SDr. David Alan Gilbert 		up++;
280089f7f59SDr. David Alan Gilbert 	}
281089f7f59SDr. David Alan Gilbert 	return upin;		/* Return input pointer */
282089f7f59SDr. David Alan Gilbert }
283089f7f59SDr. David Alan Gilbert #endif				/* UNIUPR_NOUPPER */
284089f7f59SDr. David Alan Gilbert 
285089f7f59SDr. David Alan Gilbert #endif /* _NLS_UCS2_UTILS_H */
286