1 //===-- String utils --------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LIBC_SRC_STRING_STRING_UTILS_H 10 #define LIBC_SRC_STRING_STRING_UTILS_H 11 12 #include "src/__support/CPP/Bitset.h" 13 #include "src/__support/common.h" 14 #include <stddef.h> // size_t 15 16 namespace __llvm_libc { 17 namespace internal { 18 19 // Returns the length of a string, denoted by the first occurrence 20 // of a null terminator. 21 static inline size_t string_length(const char *src) { 22 size_t length; 23 for (length = 0; *src; ++src, ++length) 24 ; 25 return length; 26 } 27 28 // Returns the first occurrence of 'ch' within the first 'n' characters of 29 // 'src'. If 'ch' is not found, returns nullptr. 30 static inline void *find_first_character(const unsigned char *src, 31 unsigned char ch, size_t n) { 32 for (; n && *src != ch; --n, ++src) 33 ; 34 return n ? const_cast<unsigned char *>(src) : nullptr; 35 } 36 37 // Returns the maximum length span that contains only characters not found in 38 // 'segment'. If no characters are found, returns the length of 'src'. 39 static inline size_t complementary_span(const char *src, const char *segment) { 40 const char *initial = src; 41 cpp::Bitset<256> bitset; 42 43 for (; *segment; ++segment) 44 bitset.set(*segment); 45 for (; *src && !bitset.test(*src); ++src) 46 ; 47 return src - initial; 48 } 49 50 // Given the similarities between strtok and strtok_r, we can implement both 51 // using a utility function. On the first call, 'src' is scanned for the 52 // first character not found in 'delimiter_string'. Once found, it scans until 53 // the first character in the 'delimiter_string' or the null terminator is 54 // found. We define this span as a token. The end of the token is appended with 55 // a null terminator, and the token is returned. The point where the last token 56 // is found is then stored within 'context' for subsequent calls. Subsequent 57 // calls will use 'context' when a nullptr is passed in for 'src'. Once the null 58 // terminating character is reached, returns a nullptr. 59 static inline char *string_token(char *__restrict src, 60 const char *__restrict delimiter_string, 61 char **__restrict saveptr) { 62 // Return nullptr immediately if both src AND saveptr are nullptr 63 if (unlikely(src == nullptr && ((src = *saveptr) == nullptr))) 64 return nullptr; 65 66 cpp::Bitset<256> delimiter_set; 67 for (; *delimiter_string != '\0'; ++delimiter_string) 68 delimiter_set.set(*delimiter_string); 69 70 for (; *src != '\0' && delimiter_set.test(*src); ++src) 71 ; 72 if (*src == '\0') { 73 *saveptr = src; 74 return nullptr; 75 } 76 char *token = src; 77 for (; *src != '\0'; ++src) { 78 if (delimiter_set.test(*src)) { 79 *src = '\0'; 80 ++src; 81 break; 82 } 83 } 84 *saveptr = src; 85 return token; 86 } 87 88 } // namespace internal 89 } // namespace __llvm_libc 90 91 #endif // LIBC_SRC_STRING_STRING_UTILS_H 92