1 //===-- String utils --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LIBC_SRC_STRING_STRING_UTILS_H
10 #define LIBC_SRC_STRING_STRING_UTILS_H
11 
12 #include "src/__support/CPP/Bitset.h"
13 #include "src/__support/common.h"
14 #include <stddef.h> // size_t
15 
16 namespace __llvm_libc {
17 namespace internal {
18 
19 // Returns the length of a string, denoted by the first occurrence
20 // of a null terminator.
21 static inline size_t string_length(const char *src) {
22   size_t length;
23   for (length = 0; *src; ++src, ++length)
24     ;
25   return length;
26 }
27 
28 // Returns the first occurrence of 'ch' within the first 'n' characters of
29 // 'src'. If 'ch' is not found, returns nullptr.
30 static inline void *find_first_character(const unsigned char *src,
31                                          unsigned char ch, size_t n) {
32   for (; n && *src != ch; --n, ++src)
33     ;
34   return n ? const_cast<unsigned char *>(src) : nullptr;
35 }
36 
37 // Returns the maximum length span that contains only characters not found in
38 // 'segment'. If no characters are found, returns the length of 'src'.
39 static inline size_t complementary_span(const char *src, const char *segment) {
40   const char *initial = src;
41   cpp::Bitset<256> bitset;
42 
43   for (; *segment; ++segment)
44     bitset.set(*segment);
45   for (; *src && !bitset.test(*src); ++src)
46     ;
47   return src - initial;
48 }
49 
50 // Given the similarities between strtok and strtok_r, we can implement both
51 // using a utility function. On the first call, 'src' is scanned for the
52 // first character not found in 'delimiter_string'. Once found, it scans until
53 // the first character in the 'delimiter_string' or the null terminator is
54 // found. We define this span as a token. The end of the token is appended with
55 // a null terminator, and the token is returned. The point where the last token
56 // is found is then stored within 'context' for subsequent calls. Subsequent
57 // calls will use 'context' when a nullptr is passed in for 'src'. Once the null
58 // terminating character is reached, returns a nullptr.
59 static inline char *string_token(char *__restrict src,
60                                  const char *__restrict delimiter_string,
61                                  char **__restrict saveptr) {
62   // Return nullptr immediately if both src AND saveptr are nullptr
63   if (unlikely(src == nullptr && ((src = *saveptr) == nullptr)))
64     return nullptr;
65 
66   cpp::Bitset<256> delimiter_set;
67   for (; *delimiter_string != '\0'; ++delimiter_string)
68     delimiter_set.set(*delimiter_string);
69 
70   for (; *src != '\0' && delimiter_set.test(*src); ++src)
71     ;
72   if (*src == '\0') {
73     *saveptr = src;
74     return nullptr;
75   }
76   char *token = src;
77   for (; *src != '\0'; ++src) {
78     if (delimiter_set.test(*src)) {
79       *src = '\0';
80       ++src;
81       break;
82     }
83   }
84   *saveptr = src;
85   return token;
86 }
87 
88 } // namespace internal
89 } // namespace __llvm_libc
90 
91 #endif //  LIBC_SRC_STRING_STRING_UTILS_H
92