1dc13a9a7Scgyurgyik //===-- String utils --------------------------------------------*- C++ -*-===//
2dc13a9a7Scgyurgyik //
3dc13a9a7Scgyurgyik // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4dc13a9a7Scgyurgyik // See https://llvm.org/LICENSE.txt for license information.
5dc13a9a7Scgyurgyik // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6dc13a9a7Scgyurgyik //
7dc13a9a7Scgyurgyik //===----------------------------------------------------------------------===//
8dc13a9a7Scgyurgyik 
9dc13a9a7Scgyurgyik #ifndef LIBC_SRC_STRING_STRING_UTILS_H
10dc13a9a7Scgyurgyik #define LIBC_SRC_STRING_STRING_UTILS_H
11dc13a9a7Scgyurgyik 
12f362aea4SSiva Chandra Reddy #include "src/__support/CPP/Bitset.h"
130784e62cSAlfonso Gregory #include "src/__support/common.h"
14*b1183305SAlex Brachet #include "src/string/memory_utils/memcpy_implementations.h"
15*b1183305SAlex Brachet #include "src/string/memory_utils/memset_implementations.h"
16dc13a9a7Scgyurgyik #include <stddef.h> // size_t
17dc13a9a7Scgyurgyik 
18dc13a9a7Scgyurgyik namespace __llvm_libc {
19dc13a9a7Scgyurgyik namespace internal {
20dc13a9a7Scgyurgyik 
21c92d1aa4Scgyurgyik // Returns the length of a string, denoted by the first occurrence
22c92d1aa4Scgyurgyik // of a null terminator.
string_length(const char * src)23c92d1aa4Scgyurgyik static inline size_t string_length(const char *src) {
24c92d1aa4Scgyurgyik   size_t length;
25c92d1aa4Scgyurgyik   for (length = 0; *src; ++src, ++length)
26c92d1aa4Scgyurgyik     ;
27c92d1aa4Scgyurgyik   return length;
28c92d1aa4Scgyurgyik }
29c92d1aa4Scgyurgyik 
30c92d1aa4Scgyurgyik // Returns the first occurrence of 'ch' within the first 'n' characters of
31c92d1aa4Scgyurgyik // 'src'. If 'ch' is not found, returns nullptr.
find_first_character(const unsigned char * src,unsigned char ch,size_t n)32c92d1aa4Scgyurgyik static inline void *find_first_character(const unsigned char *src,
33c92d1aa4Scgyurgyik                                          unsigned char ch, size_t n) {
34c92d1aa4Scgyurgyik   for (; n && *src != ch; --n, ++src)
35c92d1aa4Scgyurgyik     ;
36c92d1aa4Scgyurgyik   return n ? const_cast<unsigned char *>(src) : nullptr;
37c92d1aa4Scgyurgyik }
38c92d1aa4Scgyurgyik 
39dc13a9a7Scgyurgyik // Returns the maximum length span that contains only characters not found in
40dc13a9a7Scgyurgyik // 'segment'. If no characters are found, returns the length of 'src'.
complementary_span(const char * src,const char * segment)41dc13a9a7Scgyurgyik static inline size_t complementary_span(const char *src, const char *segment) {
42dc13a9a7Scgyurgyik   const char *initial = src;
43dc13a9a7Scgyurgyik   cpp::Bitset<256> bitset;
44dc13a9a7Scgyurgyik 
45dc13a9a7Scgyurgyik   for (; *segment; ++segment)
46dc13a9a7Scgyurgyik     bitset.set(*segment);
47dc13a9a7Scgyurgyik   for (; *src && !bitset.test(*src); ++src)
48dc13a9a7Scgyurgyik     ;
49dc13a9a7Scgyurgyik   return src - initial;
50dc13a9a7Scgyurgyik }
51dc13a9a7Scgyurgyik 
52bc45bab7Sparallels // Given the similarities between strtok and strtok_r, we can implement both
53bc45bab7Sparallels // using a utility function. On the first call, 'src' is scanned for the
54bc45bab7Sparallels // first character not found in 'delimiter_string'. Once found, it scans until
55bc45bab7Sparallels // the first character in the 'delimiter_string' or the null terminator is
56bc45bab7Sparallels // found. We define this span as a token. The end of the token is appended with
57bc45bab7Sparallels // a null terminator, and the token is returned. The point where the last token
58bc45bab7Sparallels // is found is then stored within 'context' for subsequent calls. Subsequent
59bc45bab7Sparallels // calls will use 'context' when a nullptr is passed in for 'src'. Once the null
60bc45bab7Sparallels // terminating character is reached, returns a nullptr.
string_token(char * __restrict src,const char * __restrict delimiter_string,char ** __restrict saveptr)6179ce64eaScgyurgyik static inline char *string_token(char *__restrict src,
6279ce64eaScgyurgyik                                  const char *__restrict delimiter_string,
6379ce64eaScgyurgyik                                  char **__restrict saveptr) {
640784e62cSAlfonso Gregory   // Return nullptr immediately if both src AND saveptr are nullptr
650784e62cSAlfonso Gregory   if (unlikely(src == nullptr && ((src = *saveptr) == nullptr)))
660784e62cSAlfonso Gregory     return nullptr;
670784e62cSAlfonso Gregory 
68bc45bab7Sparallels   cpp::Bitset<256> delimiter_set;
690784e62cSAlfonso Gregory   for (; *delimiter_string != '\0'; ++delimiter_string)
70bc45bab7Sparallels     delimiter_set.set(*delimiter_string);
71bc45bab7Sparallels 
720784e62cSAlfonso Gregory   for (; *src != '\0' && delimiter_set.test(*src); ++src)
73bc45bab7Sparallels     ;
740784e62cSAlfonso Gregory   if (*src == '\0') {
75bc45bab7Sparallels     *saveptr = src;
76bc45bab7Sparallels     return nullptr;
77bc45bab7Sparallels   }
78bc45bab7Sparallels   char *token = src;
790784e62cSAlfonso Gregory   for (; *src != '\0'; ++src) {
800784e62cSAlfonso Gregory     if (delimiter_set.test(*src)) {
81bc45bab7Sparallels       *src = '\0';
82bc45bab7Sparallels       ++src;
830784e62cSAlfonso Gregory       break;
840784e62cSAlfonso Gregory     }
85bc45bab7Sparallels   }
86bc45bab7Sparallels   *saveptr = src;
87bc45bab7Sparallels   return token;
88bc45bab7Sparallels }
89bc45bab7Sparallels 
strlcpy(char * __restrict dst,const char * __restrict src,size_t size)90*b1183305SAlex Brachet static inline size_t strlcpy(char *__restrict dst, const char *__restrict src,
91*b1183305SAlex Brachet                              size_t size) {
92*b1183305SAlex Brachet   size_t len = internal::string_length(src);
93*b1183305SAlex Brachet   if (!size)
94*b1183305SAlex Brachet     return len;
95*b1183305SAlex Brachet   size_t n = len < size - 1 ? len : size - 1;
96*b1183305SAlex Brachet   inline_memcpy(dst, src, n);
97*b1183305SAlex Brachet   inline_memset(dst + n, 0, size - n);
98*b1183305SAlex Brachet   return len;
99*b1183305SAlex Brachet }
100*b1183305SAlex Brachet 
101dc13a9a7Scgyurgyik } // namespace internal
102dc13a9a7Scgyurgyik } // namespace __llvm_libc
103dc13a9a7Scgyurgyik 
104dc13a9a7Scgyurgyik #endif //  LIBC_SRC_STRING_STRING_UTILS_H
105