1 //===-- String to integer conversion utils ----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LIBC_SRC_SUPPORT_STR_TO_INTEGER_H
10 #define LIBC_SRC_SUPPORT_STR_TO_INTEGER_H
11 
12 #include "src/__support/CPP/Limits.h"
13 #include "src/__support/ctype_utils.h"
14 #include <errno.h>
15 #include <limits.h>
16 
17 namespace __llvm_libc {
18 namespace internal {
19 
20 // Returns a pointer to the first character in src that is not a whitespace
21 // character (as determined by isspace())
first_non_whitespace(const char * __restrict src)22 static inline const char *first_non_whitespace(const char *__restrict src) {
23   while (internal::isspace(*src)) {
24     ++src;
25   }
26   return src;
27 }
28 
b36_char_to_int(char input)29 static inline int b36_char_to_int(char input) {
30   if (isdigit(input))
31     return input - '0';
32   if (isalpha(input))
33     return (input | 32) + 10 - 'a';
34   return 0;
35 }
36 
37 // checks if the next 3 characters of the string pointer are the start of a
38 // hexadecimal number. Does not advance the string pointer.
is_hex_start(const char * __restrict src)39 static inline bool is_hex_start(const char *__restrict src) {
40   return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
41          b36_char_to_int(*(src + 2)) < 16;
42 }
43 
44 // Takes the address of the string pointer and parses the base from the start of
45 // it. This function will advance |src| to the first valid digit in the inferred
46 // base.
infer_base(const char * __restrict * __restrict src)47 static inline int infer_base(const char *__restrict *__restrict src) {
48   // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
49   // sequence of the deimal digits and the letters a (or A) through f (or F)
50   // with values 10 through 15 respectively." (C standard 6.4.4.1)
51   if (is_hex_start(*src)) {
52     (*src) += 2;
53     return 16;
54   } // An octal number is defined as "the prefix 0 optionally followed by a
55     // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
56     // number that starts with 0, including just 0, is an octal number.
57   else if (**src == '0') {
58     return 8;
59   } // A decimal number is defined as beginning "with a nonzero digit and
60     // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
61   else {
62     return 10;
63   }
64 }
65 
66 // Takes a pointer to a string, a pointer to a string pointer, and the base to
67 // convert to. This function is used as the backend for all of the string to int
68 // functions.
69 template <class T>
strtointeger(const char * __restrict src,char ** __restrict str_end,int base)70 static inline T strtointeger(const char *__restrict src,
71                              char **__restrict str_end, int base) {
72   unsigned long long result = 0;
73   bool is_number = false;
74   const char *original_src = src;
75 
76   if (base < 0 || base == 1 || base > 36) {
77     errno = EINVAL;
78     return 0;
79   }
80 
81   src = first_non_whitespace(src);
82 
83   char result_sign = '+';
84   if (*src == '+' || *src == '-') {
85     result_sign = *src;
86     ++src;
87   }
88 
89   if (base == 0) {
90     base = infer_base(&src);
91   } else if (base == 16 && is_hex_start(src)) {
92     src = src + 2;
93   }
94 
95   constexpr bool IS_UNSIGNED = (__llvm_libc::cpp::NumericLimits<T>::min() == 0);
96   const bool is_positive = (result_sign == '+');
97   unsigned long long constexpr NEGATIVE_MAX =
98       !IS_UNSIGNED ? static_cast<unsigned long long>(
99                          __llvm_libc::cpp::NumericLimits<T>::max()) +
100                          1
101                    : __llvm_libc::cpp::NumericLimits<T>::max();
102   unsigned long long const abs_max =
103       (is_positive ? __llvm_libc::cpp::NumericLimits<T>::max() : NEGATIVE_MAX);
104   unsigned long long const abs_max_div_by_base = abs_max / base;
105   while (isalnum(*src)) {
106     int cur_digit = b36_char_to_int(*src);
107     if (cur_digit >= base)
108       break;
109 
110     is_number = true;
111     ++src;
112 
113     // If the number has already hit the maximum value for the current type then
114     // the result cannot change, but we still need to advance src to the end of
115     // the number.
116     if (result == abs_max) {
117       errno = ERANGE;
118       continue;
119     }
120 
121     if (result > abs_max_div_by_base) {
122       result = abs_max;
123       errno = ERANGE;
124     } else {
125       result = result * base;
126     }
127     if (result > abs_max - cur_digit) {
128       result = abs_max;
129       errno = ERANGE;
130     } else {
131       result = result + cur_digit;
132     }
133   }
134 
135   if (str_end != nullptr)
136     *str_end = const_cast<char *>(is_number ? src : original_src);
137 
138   if (result == abs_max) {
139     if (is_positive || IS_UNSIGNED)
140       return __llvm_libc::cpp::NumericLimits<T>::max();
141     else // T is signed and there is a negative overflow
142       return __llvm_libc::cpp::NumericLimits<T>::min();
143   }
144 
145   return is_positive ? static_cast<T>(result) : -static_cast<T>(result);
146 }
147 
148 } // namespace internal
149 } // namespace __llvm_libc
150 
151 #endif // LIBC_SRC_SUPPORT_STR_TO_INTEGER_H
152