xref: /linux-6.15/lib/ucs2_string.c (revision 91640531)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
20635eb8aSMatthew Garrett #include <linux/ucs2_string.h>
30635eb8aSMatthew Garrett #include <linux/module.h>
40635eb8aSMatthew Garrett 
50635eb8aSMatthew Garrett /* Return the number of unicode characters in data */
60635eb8aSMatthew Garrett unsigned long
ucs2_strnlen(const ucs2_char_t * s,size_t maxlength)70635eb8aSMatthew Garrett ucs2_strnlen(const ucs2_char_t *s, size_t maxlength)
80635eb8aSMatthew Garrett {
90635eb8aSMatthew Garrett         unsigned long length = 0;
100635eb8aSMatthew Garrett 
110635eb8aSMatthew Garrett         while (*s++ != 0 && length < maxlength)
120635eb8aSMatthew Garrett                 length++;
130635eb8aSMatthew Garrett         return length;
140635eb8aSMatthew Garrett }
150635eb8aSMatthew Garrett EXPORT_SYMBOL(ucs2_strnlen);
160635eb8aSMatthew Garrett 
170635eb8aSMatthew Garrett unsigned long
ucs2_strlen(const ucs2_char_t * s)180635eb8aSMatthew Garrett ucs2_strlen(const ucs2_char_t *s)
190635eb8aSMatthew Garrett {
200635eb8aSMatthew Garrett         return ucs2_strnlen(s, ~0UL);
210635eb8aSMatthew Garrett }
220635eb8aSMatthew Garrett EXPORT_SYMBOL(ucs2_strlen);
230635eb8aSMatthew Garrett 
240635eb8aSMatthew Garrett /*
250635eb8aSMatthew Garrett  * Return the number of bytes is the length of this string
260635eb8aSMatthew Garrett  * Note: this is NOT the same as the number of unicode characters
270635eb8aSMatthew Garrett  */
280635eb8aSMatthew Garrett unsigned long
ucs2_strsize(const ucs2_char_t * data,unsigned long maxlength)290635eb8aSMatthew Garrett ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength)
300635eb8aSMatthew Garrett {
310635eb8aSMatthew Garrett         return ucs2_strnlen(data, maxlength/sizeof(ucs2_char_t)) * sizeof(ucs2_char_t);
320635eb8aSMatthew Garrett }
330635eb8aSMatthew Garrett EXPORT_SYMBOL(ucs2_strsize);
340635eb8aSMatthew Garrett 
35e4c89f93SMaximilian Luz /**
36e4c89f93SMaximilian Luz  * ucs2_strscpy() - Copy a UCS2 string into a sized buffer.
37e4c89f93SMaximilian Luz  *
38e4c89f93SMaximilian Luz  * @dst: Pointer to the destination buffer where to copy the string to.
39e4c89f93SMaximilian Luz  * @src: Pointer to the source buffer where to copy the string from.
40e4c89f93SMaximilian Luz  * @count: Size of the destination buffer, in UCS2 (16-bit) characters.
41e4c89f93SMaximilian Luz  *
42e4c89f93SMaximilian Luz  * Like strscpy(), only for UCS2 strings.
43e4c89f93SMaximilian Luz  *
44e4c89f93SMaximilian Luz  * Copy the source string @src, or as much of it as fits, into the destination
45e4c89f93SMaximilian Luz  * buffer @dst. The behavior is undefined if the string buffers overlap. The
46e4c89f93SMaximilian Luz  * destination buffer @dst is always NUL-terminated, unless it's zero-sized.
47e4c89f93SMaximilian Luz  *
48e4c89f93SMaximilian Luz  * Return: The number of characters copied into @dst (excluding the trailing
49e4c89f93SMaximilian Luz  * %NUL terminator) or -E2BIG if @count is 0 or @src was truncated due to the
50e4c89f93SMaximilian Luz  * destination buffer being too small.
51e4c89f93SMaximilian Luz  */
ucs2_strscpy(ucs2_char_t * dst,const ucs2_char_t * src,size_t count)52e4c89f93SMaximilian Luz ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count)
53e4c89f93SMaximilian Luz {
54e4c89f93SMaximilian Luz 	long res;
55e4c89f93SMaximilian Luz 
56e4c89f93SMaximilian Luz 	/*
57e4c89f93SMaximilian Luz 	 * Ensure that we have a valid amount of space. We need to store at
58e4c89f93SMaximilian Luz 	 * least one NUL-character.
59e4c89f93SMaximilian Luz 	 */
60e4c89f93SMaximilian Luz 	if (count == 0 || WARN_ON_ONCE(count > INT_MAX / sizeof(*dst)))
61e4c89f93SMaximilian Luz 		return -E2BIG;
62e4c89f93SMaximilian Luz 
63e4c89f93SMaximilian Luz 	/*
64e4c89f93SMaximilian Luz 	 * Copy at most 'count' characters, return early if we find a
65e4c89f93SMaximilian Luz 	 * NUL-terminator.
66e4c89f93SMaximilian Luz 	 */
67e4c89f93SMaximilian Luz 	for (res = 0; res < count; res++) {
68e4c89f93SMaximilian Luz 		ucs2_char_t c;
69e4c89f93SMaximilian Luz 
70e4c89f93SMaximilian Luz 		c = src[res];
71e4c89f93SMaximilian Luz 		dst[res] = c;
72e4c89f93SMaximilian Luz 
73e4c89f93SMaximilian Luz 		if (!c)
74e4c89f93SMaximilian Luz 			return res;
75e4c89f93SMaximilian Luz 	}
76e4c89f93SMaximilian Luz 
77e4c89f93SMaximilian Luz 	/*
78e4c89f93SMaximilian Luz 	 * The loop above terminated without finding a NUL-terminator,
79e4c89f93SMaximilian Luz 	 * exceeding the 'count': Enforce proper NUL-termination and return
80e4c89f93SMaximilian Luz 	 * error.
81e4c89f93SMaximilian Luz 	 */
82e4c89f93SMaximilian Luz 	dst[count - 1] = 0;
83e4c89f93SMaximilian Luz 	return -E2BIG;
84e4c89f93SMaximilian Luz }
85e4c89f93SMaximilian Luz EXPORT_SYMBOL(ucs2_strscpy);
86e4c89f93SMaximilian Luz 
870635eb8aSMatthew Garrett int
ucs2_strncmp(const ucs2_char_t * a,const ucs2_char_t * b,size_t len)880635eb8aSMatthew Garrett ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len)
890635eb8aSMatthew Garrett {
900635eb8aSMatthew Garrett         while (1) {
910635eb8aSMatthew Garrett                 if (len == 0)
920635eb8aSMatthew Garrett                         return 0;
930635eb8aSMatthew Garrett                 if (*a < *b)
940635eb8aSMatthew Garrett                         return -1;
950635eb8aSMatthew Garrett                 if (*a > *b)
960635eb8aSMatthew Garrett                         return 1;
970635eb8aSMatthew Garrett                 if (*a == 0) /* implies *b == 0 */
980635eb8aSMatthew Garrett                         return 0;
990635eb8aSMatthew Garrett                 a++;
1000635eb8aSMatthew Garrett                 b++;
1010635eb8aSMatthew Garrett                 len--;
1020635eb8aSMatthew Garrett         }
1030635eb8aSMatthew Garrett }
1040635eb8aSMatthew Garrett EXPORT_SYMBOL(ucs2_strncmp);
10573500267SPeter Jones 
10673500267SPeter Jones unsigned long
ucs2_utf8size(const ucs2_char_t * src)10773500267SPeter Jones ucs2_utf8size(const ucs2_char_t *src)
10873500267SPeter Jones {
10973500267SPeter Jones 	unsigned long i;
11073500267SPeter Jones 	unsigned long j = 0;
11173500267SPeter Jones 
112cf289cefSLukas Wunner 	for (i = 0; src[i]; i++) {
11373500267SPeter Jones 		u16 c = src[i];
11473500267SPeter Jones 
115a6807590SJason Andryuk 		if (c >= 0x800)
11673500267SPeter Jones 			j += 3;
117a6807590SJason Andryuk 		else if (c >= 0x80)
11873500267SPeter Jones 			j += 2;
11973500267SPeter Jones 		else
12073500267SPeter Jones 			j += 1;
12173500267SPeter Jones 	}
12273500267SPeter Jones 
12373500267SPeter Jones 	return j;
12473500267SPeter Jones }
12573500267SPeter Jones EXPORT_SYMBOL(ucs2_utf8size);
12673500267SPeter Jones 
12773500267SPeter Jones /*
12873500267SPeter Jones  * copy at most maxlength bytes of whole utf8 characters to dest from the
12973500267SPeter Jones  * ucs2 string src.
13073500267SPeter Jones  *
13173500267SPeter Jones  * The return value is the number of characters copied, not including the
13273500267SPeter Jones  * final NUL character.
13373500267SPeter Jones  */
13473500267SPeter Jones unsigned long
ucs2_as_utf8(u8 * dest,const ucs2_char_t * src,unsigned long maxlength)13573500267SPeter Jones ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength)
13673500267SPeter Jones {
13773500267SPeter Jones 	unsigned int i;
13873500267SPeter Jones 	unsigned long j = 0;
13973500267SPeter Jones 	unsigned long limit = ucs2_strnlen(src, maxlength);
14073500267SPeter Jones 
14173500267SPeter Jones 	for (i = 0; maxlength && i < limit; i++) {
14273500267SPeter Jones 		u16 c = src[i];
14373500267SPeter Jones 
144a6807590SJason Andryuk 		if (c >= 0x800) {
14573500267SPeter Jones 			if (maxlength < 3)
14673500267SPeter Jones 				break;
14773500267SPeter Jones 			maxlength -= 3;
14873500267SPeter Jones 			dest[j++] = 0xe0 | (c & 0xf000) >> 12;
149a6807590SJason Andryuk 			dest[j++] = 0x80 | (c & 0x0fc0) >> 6;
15073500267SPeter Jones 			dest[j++] = 0x80 | (c & 0x003f);
151a6807590SJason Andryuk 		} else if (c >= 0x80) {
15273500267SPeter Jones 			if (maxlength < 2)
15373500267SPeter Jones 				break;
15473500267SPeter Jones 			maxlength -= 2;
155a6807590SJason Andryuk 			dest[j++] = 0xc0 | (c & 0x7c0) >> 6;
156a6807590SJason Andryuk 			dest[j++] = 0x80 | (c & 0x03f);
15773500267SPeter Jones 		} else {
15873500267SPeter Jones 			maxlength -= 1;
15973500267SPeter Jones 			dest[j++] = c & 0x7f;
16073500267SPeter Jones 		}
16173500267SPeter Jones 	}
16273500267SPeter Jones 	if (maxlength)
16373500267SPeter Jones 		dest[j] = '\0';
16473500267SPeter Jones 	return j;
16573500267SPeter Jones }
16673500267SPeter Jones EXPORT_SYMBOL(ucs2_as_utf8);
16709088a40SRandy Dunlap 
168*91640531SArnd Bergmann MODULE_DESCRIPTION("UCS2 string handling");
16909088a40SRandy Dunlap MODULE_LICENSE("GPL v2");
170