1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APInt to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246 
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250 
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254 
255 APInt& APInt::operator*=(const APInt& RHS) {
256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORDTYPE_MAX;
328 }
329 
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332   tcComplement(U.pVal, getNumWords());
333   clearUnusedBits();
334 }
335 
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
341   setBitVal(bitPosition, !(*this)[bitPosition]);
342 }
343 
344 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
345   unsigned subBitWidth = subBits.getBitWidth();
346   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
347          "Illegal bit insertion");
348 
349   // Insertion is a direct copy.
350   if (subBitWidth == BitWidth) {
351     *this = subBits;
352     return;
353   }
354 
355   // Single word result can be done as a direct bitmask.
356   if (isSingleWord()) {
357     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
358     U.VAL &= ~(mask << bitPosition);
359     U.VAL |= (subBits.U.VAL << bitPosition);
360     return;
361   }
362 
363   unsigned loBit = whichBit(bitPosition);
364   unsigned loWord = whichWord(bitPosition);
365   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
366 
367   // Insertion within a single word can be done as a direct bitmask.
368   if (loWord == hi1Word) {
369     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
370     U.pVal[loWord] &= ~(mask << loBit);
371     U.pVal[loWord] |= (subBits.U.VAL << loBit);
372     return;
373   }
374 
375   // Insert on word boundaries.
376   if (loBit == 0) {
377     // Direct copy whole words.
378     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
379     memcpy(U.pVal + loWord, subBits.getRawData(),
380            numWholeSubWords * APINT_WORD_SIZE);
381 
382     // Mask+insert remaining bits.
383     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
384     if (remainingBits != 0) {
385       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
386       U.pVal[hi1Word] &= ~mask;
387       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
388     }
389     return;
390   }
391 
392   // General case - set/clear individual bits in dst based on src.
393   // TODO - there is scope for optimization here, but at the moment this code
394   // path is barely used so prefer readability over performance.
395   for (unsigned i = 0; i != subBitWidth; ++i)
396     setBitVal(bitPosition + i, subBits[i]);
397 }
398 
399 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
400   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
401   subBits &= maskBits;
402   if (isSingleWord()) {
403     U.VAL &= ~(maskBits << bitPosition);
404     U.VAL |= subBits << bitPosition;
405     return;
406   }
407 
408   unsigned loBit = whichBit(bitPosition);
409   unsigned loWord = whichWord(bitPosition);
410   unsigned hiWord = whichWord(bitPosition + numBits - 1);
411   if (loWord == hiWord) {
412     U.pVal[loWord] &= ~(maskBits << loBit);
413     U.pVal[loWord] |= subBits << loBit;
414     return;
415   }
416 
417   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
418   unsigned wordBits = 8 * sizeof(WordType);
419   U.pVal[loWord] &= ~(maskBits << loBit);
420   U.pVal[loWord] |= subBits << loBit;
421 
422   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
423   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
424 }
425 
426 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
427   assert(numBits > 0 && "Can't extract zero bits");
428   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
429          "Illegal bit extraction");
430 
431   if (isSingleWord())
432     return APInt(numBits, U.VAL >> bitPosition);
433 
434   unsigned loBit = whichBit(bitPosition);
435   unsigned loWord = whichWord(bitPosition);
436   unsigned hiWord = whichWord(bitPosition + numBits - 1);
437 
438   // Single word result extracting bits from a single word source.
439   if (loWord == hiWord)
440     return APInt(numBits, U.pVal[loWord] >> loBit);
441 
442   // Extracting bits that start on a source word boundary can be done
443   // as a fast memory copy.
444   if (loBit == 0)
445     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
446 
447   // General case - shift + copy source words directly into place.
448   APInt Result(numBits, 0);
449   unsigned NumSrcWords = getNumWords();
450   unsigned NumDstWords = Result.getNumWords();
451 
452   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
453   for (unsigned word = 0; word < NumDstWords; ++word) {
454     uint64_t w0 = U.pVal[loWord + word];
455     uint64_t w1 =
456         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
457     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
458   }
459 
460   return Result.clearUnusedBits();
461 }
462 
463 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
464                                        unsigned bitPosition) const {
465   assert(numBits > 0 && "Can't extract zero bits");
466   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
467          "Illegal bit extraction");
468   assert(numBits <= 64 && "Illegal bit extraction");
469 
470   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
471   if (isSingleWord())
472     return (U.VAL >> bitPosition) & maskBits;
473 
474   unsigned loBit = whichBit(bitPosition);
475   unsigned loWord = whichWord(bitPosition);
476   unsigned hiWord = whichWord(bitPosition + numBits - 1);
477   if (loWord == hiWord)
478     return (U.pVal[loWord] >> loBit) & maskBits;
479 
480   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
481   unsigned wordBits = 8 * sizeof(WordType);
482   uint64_t retBits = U.pVal[loWord] >> loBit;
483   retBits |= U.pVal[hiWord] << (wordBits - loBit);
484   retBits &= maskBits;
485   return retBits;
486 }
487 
488 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
489   assert(!str.empty() && "Invalid string length");
490   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
491           radix == 36) &&
492          "Radix should be 2, 8, 10, 16, or 36!");
493 
494   size_t slen = str.size();
495 
496   // Each computation below needs to know if it's negative.
497   StringRef::iterator p = str.begin();
498   unsigned isNegative = *p == '-';
499   if (*p == '-' || *p == '+') {
500     p++;
501     slen--;
502     assert(slen && "String is only a sign, needs a value.");
503   }
504 
505   // For radixes of power-of-two values, the bits required is accurately and
506   // easily computed
507   if (radix == 2)
508     return slen + isNegative;
509   if (radix == 8)
510     return slen * 3 + isNegative;
511   if (radix == 16)
512     return slen * 4 + isNegative;
513 
514   // FIXME: base 36
515 
516   // This is grossly inefficient but accurate. We could probably do something
517   // with a computation of roughly slen*64/20 and then adjust by the value of
518   // the first few digits. But, I'm not sure how accurate that could be.
519 
520   // Compute a sufficient number of bits that is always large enough but might
521   // be too large. This avoids the assertion in the constructor. This
522   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
523   // bits in that case.
524   unsigned sufficient
525     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
526                  : (slen == 1 ? 7 : slen * 16/3);
527 
528   // Convert to the actual binary value.
529   APInt tmp(sufficient, StringRef(p, slen), radix);
530 
531   // Compute how many bits are required. If the log is infinite, assume we need
532   // just bit. If the log is exact and value is negative, then the value is
533   // MinSignedValue with (log + 1) bits.
534   unsigned log = tmp.logBase2();
535   if (log == (unsigned)-1) {
536     return isNegative + 1;
537   } else if (isNegative && tmp.isPowerOf2()) {
538     return isNegative + log;
539   } else {
540     return isNegative + log + 1;
541   }
542 }
543 
544 hash_code llvm::hash_value(const APInt &Arg) {
545   if (Arg.isSingleWord())
546     return hash_combine(Arg.BitWidth, Arg.U.VAL);
547 
548   return hash_combine(
549       Arg.BitWidth,
550       hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
551 }
552 
553 unsigned DenseMapInfo<APInt>::getHashValue(const APInt &Key) {
554   return static_cast<unsigned>(hash_value(Key));
555 }
556 
557 bool APInt::isSplat(unsigned SplatSizeInBits) const {
558   assert(getBitWidth() % SplatSizeInBits == 0 &&
559          "SplatSizeInBits must divide width!");
560   // We can check that all parts of an integer are equal by making use of a
561   // little trick: rotate and check if it's still the same value.
562   return *this == rotl(SplatSizeInBits);
563 }
564 
565 /// This function returns the high "numBits" bits of this APInt.
566 APInt APInt::getHiBits(unsigned numBits) const {
567   return this->lshr(BitWidth - numBits);
568 }
569 
570 /// This function returns the low "numBits" bits of this APInt.
571 APInt APInt::getLoBits(unsigned numBits) const {
572   APInt Result(getLowBitsSet(BitWidth, numBits));
573   Result &= *this;
574   return Result;
575 }
576 
577 /// Return a value containing V broadcasted over NewLen bits.
578 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
579   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
580 
581   APInt Val = V.zextOrSelf(NewLen);
582   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
583     Val |= Val << I;
584 
585   return Val;
586 }
587 
588 unsigned APInt::countLeadingZerosSlowCase() const {
589   unsigned Count = 0;
590   for (int i = getNumWords()-1; i >= 0; --i) {
591     uint64_t V = U.pVal[i];
592     if (V == 0)
593       Count += APINT_BITS_PER_WORD;
594     else {
595       Count += llvm::countLeadingZeros(V);
596       break;
597     }
598   }
599   // Adjust for unused bits in the most significant word (they are zero).
600   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
601   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
602   return Count;
603 }
604 
605 unsigned APInt::countLeadingOnesSlowCase() const {
606   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
607   unsigned shift;
608   if (!highWordBits) {
609     highWordBits = APINT_BITS_PER_WORD;
610     shift = 0;
611   } else {
612     shift = APINT_BITS_PER_WORD - highWordBits;
613   }
614   int i = getNumWords() - 1;
615   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
616   if (Count == highWordBits) {
617     for (i--; i >= 0; --i) {
618       if (U.pVal[i] == WORDTYPE_MAX)
619         Count += APINT_BITS_PER_WORD;
620       else {
621         Count += llvm::countLeadingOnes(U.pVal[i]);
622         break;
623       }
624     }
625   }
626   return Count;
627 }
628 
629 unsigned APInt::countTrailingZerosSlowCase() const {
630   unsigned Count = 0;
631   unsigned i = 0;
632   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
633     Count += APINT_BITS_PER_WORD;
634   if (i < getNumWords())
635     Count += llvm::countTrailingZeros(U.pVal[i]);
636   return std::min(Count, BitWidth);
637 }
638 
639 unsigned APInt::countTrailingOnesSlowCase() const {
640   unsigned Count = 0;
641   unsigned i = 0;
642   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
643     Count += APINT_BITS_PER_WORD;
644   if (i < getNumWords())
645     Count += llvm::countTrailingOnes(U.pVal[i]);
646   assert(Count <= BitWidth);
647   return Count;
648 }
649 
650 unsigned APInt::countPopulationSlowCase() const {
651   unsigned Count = 0;
652   for (unsigned i = 0; i < getNumWords(); ++i)
653     Count += llvm::countPopulation(U.pVal[i]);
654   return Count;
655 }
656 
657 bool APInt::intersectsSlowCase(const APInt &RHS) const {
658   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
659     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
660       return true;
661 
662   return false;
663 }
664 
665 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
666   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
667     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
668       return false;
669 
670   return true;
671 }
672 
673 APInt APInt::byteSwap() const {
674   assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
675   if (BitWidth == 16)
676     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
677   if (BitWidth == 32)
678     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
679   if (BitWidth <= 64) {
680     uint64_t Tmp1 = ByteSwap_64(U.VAL);
681     Tmp1 >>= (64 - BitWidth);
682     return APInt(BitWidth, Tmp1);
683   }
684 
685   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
686   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
687     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
688   if (Result.BitWidth != BitWidth) {
689     Result.lshrInPlace(Result.BitWidth - BitWidth);
690     Result.BitWidth = BitWidth;
691   }
692   return Result;
693 }
694 
695 APInt APInt::reverseBits() const {
696   switch (BitWidth) {
697   case 64:
698     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
699   case 32:
700     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
701   case 16:
702     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
703   case 8:
704     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
705   default:
706     break;
707   }
708 
709   APInt Val(*this);
710   APInt Reversed(BitWidth, 0);
711   unsigned S = BitWidth;
712 
713   for (; Val != 0; Val.lshrInPlace(1)) {
714     Reversed <<= 1;
715     Reversed |= Val[0];
716     --S;
717   }
718 
719   Reversed <<= S;
720   return Reversed;
721 }
722 
723 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
724   // Fast-path a common case.
725   if (A == B) return A;
726 
727   // Corner cases: if either operand is zero, the other is the gcd.
728   if (!A) return B;
729   if (!B) return A;
730 
731   // Count common powers of 2 and remove all other powers of 2.
732   unsigned Pow2;
733   {
734     unsigned Pow2_A = A.countTrailingZeros();
735     unsigned Pow2_B = B.countTrailingZeros();
736     if (Pow2_A > Pow2_B) {
737       A.lshrInPlace(Pow2_A - Pow2_B);
738       Pow2 = Pow2_B;
739     } else if (Pow2_B > Pow2_A) {
740       B.lshrInPlace(Pow2_B - Pow2_A);
741       Pow2 = Pow2_A;
742     } else {
743       Pow2 = Pow2_A;
744     }
745   }
746 
747   // Both operands are odd multiples of 2^Pow_2:
748   //
749   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
750   //
751   // This is a modified version of Stein's algorithm, taking advantage of
752   // efficient countTrailingZeros().
753   while (A != B) {
754     if (A.ugt(B)) {
755       A -= B;
756       A.lshrInPlace(A.countTrailingZeros() - Pow2);
757     } else {
758       B -= A;
759       B.lshrInPlace(B.countTrailingZeros() - Pow2);
760     }
761   }
762 
763   return A;
764 }
765 
766 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
767   uint64_t I = bit_cast<uint64_t>(Double);
768 
769   // Get the sign bit from the highest order bit
770   bool isNeg = I >> 63;
771 
772   // Get the 11-bit exponent and adjust for the 1023 bit bias
773   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
774 
775   // If the exponent is negative, the value is < 0 so just return 0.
776   if (exp < 0)
777     return APInt(width, 0u);
778 
779   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
780   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
781 
782   // If the exponent doesn't shift all bits out of the mantissa
783   if (exp < 52)
784     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
785                     APInt(width, mantissa >> (52 - exp));
786 
787   // If the client didn't provide enough bits for us to shift the mantissa into
788   // then the result is undefined, just return 0
789   if (width <= exp - 52)
790     return APInt(width, 0);
791 
792   // Otherwise, we have to shift the mantissa bits up to the right location
793   APInt Tmp(width, mantissa);
794   Tmp <<= (unsigned)exp - 52;
795   return isNeg ? -Tmp : Tmp;
796 }
797 
798 /// This function converts this APInt to a double.
799 /// The layout for double is as following (IEEE Standard 754):
800 ///  --------------------------------------
801 /// |  Sign    Exponent    Fraction    Bias |
802 /// |-------------------------------------- |
803 /// |  1[63]   11[62-52]   52[51-00]   1023 |
804 ///  --------------------------------------
805 double APInt::roundToDouble(bool isSigned) const {
806 
807   // Handle the simple case where the value is contained in one uint64_t.
808   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
809   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
810     if (isSigned) {
811       int64_t sext = SignExtend64(getWord(0), BitWidth);
812       return double(sext);
813     } else
814       return double(getWord(0));
815   }
816 
817   // Determine if the value is negative.
818   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
819 
820   // Construct the absolute value if we're negative.
821   APInt Tmp(isNeg ? -(*this) : (*this));
822 
823   // Figure out how many bits we're using.
824   unsigned n = Tmp.getActiveBits();
825 
826   // The exponent (without bias normalization) is just the number of bits
827   // we are using. Note that the sign bit is gone since we constructed the
828   // absolute value.
829   uint64_t exp = n;
830 
831   // Return infinity for exponent overflow
832   if (exp > 1023) {
833     if (!isSigned || !isNeg)
834       return std::numeric_limits<double>::infinity();
835     else
836       return -std::numeric_limits<double>::infinity();
837   }
838   exp += 1023; // Increment for 1023 bias
839 
840   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
841   // extract the high 52 bits from the correct words in pVal.
842   uint64_t mantissa;
843   unsigned hiWord = whichWord(n-1);
844   if (hiWord == 0) {
845     mantissa = Tmp.U.pVal[0];
846     if (n > 52)
847       mantissa >>= n - 52; // shift down, we want the top 52 bits.
848   } else {
849     assert(hiWord > 0 && "huh?");
850     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
851     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
852     mantissa = hibits | lobits;
853   }
854 
855   // The leading bit of mantissa is implicit, so get rid of it.
856   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
857   uint64_t I = sign | (exp << 52) | mantissa;
858   return bit_cast<double>(I);
859 }
860 
861 // Truncate to new width.
862 APInt APInt::trunc(unsigned width) const {
863   assert(width < BitWidth && "Invalid APInt Truncate request");
864   assert(width && "Can't truncate to 0 bits");
865 
866   if (width <= APINT_BITS_PER_WORD)
867     return APInt(width, getRawData()[0]);
868 
869   APInt Result(getMemory(getNumWords(width)), width);
870 
871   // Copy full words.
872   unsigned i;
873   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
874     Result.U.pVal[i] = U.pVal[i];
875 
876   // Truncate and copy any partial word.
877   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
878   if (bits != 0)
879     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
880 
881   return Result;
882 }
883 
884 // Truncate to new width with unsigned saturation.
885 APInt APInt::truncUSat(unsigned width) const {
886   assert(width < BitWidth && "Invalid APInt Truncate request");
887   assert(width && "Can't truncate to 0 bits");
888 
889   // Can we just losslessly truncate it?
890   if (isIntN(width))
891     return trunc(width);
892   // If not, then just return the new limit.
893   return APInt::getMaxValue(width);
894 }
895 
896 // Truncate to new width with signed saturation.
897 APInt APInt::truncSSat(unsigned width) const {
898   assert(width < BitWidth && "Invalid APInt Truncate request");
899   assert(width && "Can't truncate to 0 bits");
900 
901   // Can we just losslessly truncate it?
902   if (isSignedIntN(width))
903     return trunc(width);
904   // If not, then just return the new limits.
905   return isNegative() ? APInt::getSignedMinValue(width)
906                       : APInt::getSignedMaxValue(width);
907 }
908 
909 // Sign extend to a new width.
910 APInt APInt::sext(unsigned Width) const {
911   assert(Width > BitWidth && "Invalid APInt SignExtend request");
912 
913   if (Width <= APINT_BITS_PER_WORD)
914     return APInt(Width, SignExtend64(U.VAL, BitWidth));
915 
916   APInt Result(getMemory(getNumWords(Width)), Width);
917 
918   // Copy words.
919   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
920 
921   // Sign extend the last word since there may be unused bits in the input.
922   Result.U.pVal[getNumWords() - 1] =
923       SignExtend64(Result.U.pVal[getNumWords() - 1],
924                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
925 
926   // Fill with sign bits.
927   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
928               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
929   Result.clearUnusedBits();
930   return Result;
931 }
932 
933 //  Zero extend to a new width.
934 APInt APInt::zext(unsigned width) const {
935   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
936 
937   if (width <= APINT_BITS_PER_WORD)
938     return APInt(width, U.VAL);
939 
940   APInt Result(getMemory(getNumWords(width)), width);
941 
942   // Copy words.
943   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
944 
945   // Zero remaining words.
946   std::memset(Result.U.pVal + getNumWords(), 0,
947               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
948 
949   return Result;
950 }
951 
952 APInt APInt::zextOrTrunc(unsigned width) const {
953   if (BitWidth < width)
954     return zext(width);
955   if (BitWidth > width)
956     return trunc(width);
957   return *this;
958 }
959 
960 APInt APInt::sextOrTrunc(unsigned width) const {
961   if (BitWidth < width)
962     return sext(width);
963   if (BitWidth > width)
964     return trunc(width);
965   return *this;
966 }
967 
968 APInt APInt::truncOrSelf(unsigned width) const {
969   if (BitWidth > width)
970     return trunc(width);
971   return *this;
972 }
973 
974 APInt APInt::zextOrSelf(unsigned width) const {
975   if (BitWidth < width)
976     return zext(width);
977   return *this;
978 }
979 
980 APInt APInt::sextOrSelf(unsigned width) const {
981   if (BitWidth < width)
982     return sext(width);
983   return *this;
984 }
985 
986 /// Arithmetic right-shift this APInt by shiftAmt.
987 /// Arithmetic right-shift function.
988 void APInt::ashrInPlace(const APInt &shiftAmt) {
989   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
990 }
991 
992 /// Arithmetic right-shift this APInt by shiftAmt.
993 /// Arithmetic right-shift function.
994 void APInt::ashrSlowCase(unsigned ShiftAmt) {
995   // Don't bother performing a no-op shift.
996   if (!ShiftAmt)
997     return;
998 
999   // Save the original sign bit for later.
1000   bool Negative = isNegative();
1001 
1002   // WordShift is the inter-part shift; BitShift is intra-part shift.
1003   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1004   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1005 
1006   unsigned WordsToMove = getNumWords() - WordShift;
1007   if (WordsToMove != 0) {
1008     // Sign extend the last word to fill in the unused bits.
1009     U.pVal[getNumWords() - 1] = SignExtend64(
1010         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1011 
1012     // Fastpath for moving by whole words.
1013     if (BitShift == 0) {
1014       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1015     } else {
1016       // Move the words containing significant bits.
1017       for (unsigned i = 0; i != WordsToMove - 1; ++i)
1018         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1019                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1020 
1021       // Handle the last word which has no high bits to copy.
1022       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1023       // Sign extend one more time.
1024       U.pVal[WordsToMove - 1] =
1025           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1026     }
1027   }
1028 
1029   // Fill in the remainder based on the original sign.
1030   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1031               WordShift * APINT_WORD_SIZE);
1032   clearUnusedBits();
1033 }
1034 
1035 /// Logical right-shift this APInt by shiftAmt.
1036 /// Logical right-shift function.
1037 void APInt::lshrInPlace(const APInt &shiftAmt) {
1038   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1039 }
1040 
1041 /// Logical right-shift this APInt by shiftAmt.
1042 /// Logical right-shift function.
1043 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1044   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1045 }
1046 
1047 /// Left-shift this APInt by shiftAmt.
1048 /// Left-shift function.
1049 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1050   // It's undefined behavior in C to shift by BitWidth or greater.
1051   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1052   return *this;
1053 }
1054 
1055 void APInt::shlSlowCase(unsigned ShiftAmt) {
1056   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1057   clearUnusedBits();
1058 }
1059 
1060 // Calculate the rotate amount modulo the bit width.
1061 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1062   unsigned rotBitWidth = rotateAmt.getBitWidth();
1063   APInt rot = rotateAmt;
1064   if (rotBitWidth < BitWidth) {
1065     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1066     // e.g. APInt(1, 32) would give APInt(1, 0).
1067     rot = rotateAmt.zext(BitWidth);
1068   }
1069   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1070   return rot.getLimitedValue(BitWidth);
1071 }
1072 
1073 APInt APInt::rotl(const APInt &rotateAmt) const {
1074   return rotl(rotateModulo(BitWidth, rotateAmt));
1075 }
1076 
1077 APInt APInt::rotl(unsigned rotateAmt) const {
1078   rotateAmt %= BitWidth;
1079   if (rotateAmt == 0)
1080     return *this;
1081   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1082 }
1083 
1084 APInt APInt::rotr(const APInt &rotateAmt) const {
1085   return rotr(rotateModulo(BitWidth, rotateAmt));
1086 }
1087 
1088 APInt APInt::rotr(unsigned rotateAmt) const {
1089   rotateAmt %= BitWidth;
1090   if (rotateAmt == 0)
1091     return *this;
1092   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1093 }
1094 
1095 // Square Root - this method computes and returns the square root of "this".
1096 // Three mechanisms are used for computation. For small values (<= 5 bits),
1097 // a table lookup is done. This gets some performance for common cases. For
1098 // values using less than 52 bits, the value is converted to double and then
1099 // the libc sqrt function is called. The result is rounded and then converted
1100 // back to a uint64_t which is then used to construct the result. Finally,
1101 // the Babylonian method for computing square roots is used.
1102 APInt APInt::sqrt() const {
1103 
1104   // Determine the magnitude of the value.
1105   unsigned magnitude = getActiveBits();
1106 
1107   // Use a fast table for some small values. This also gets rid of some
1108   // rounding errors in libc sqrt for small values.
1109   if (magnitude <= 5) {
1110     static const uint8_t results[32] = {
1111       /*     0 */ 0,
1112       /*  1- 2 */ 1, 1,
1113       /*  3- 6 */ 2, 2, 2, 2,
1114       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1115       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1116       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1117       /*    31 */ 6
1118     };
1119     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1120   }
1121 
1122   // If the magnitude of the value fits in less than 52 bits (the precision of
1123   // an IEEE double precision floating point value), then we can use the
1124   // libc sqrt function which will probably use a hardware sqrt computation.
1125   // This should be faster than the algorithm below.
1126   if (magnitude < 52) {
1127     return APInt(BitWidth,
1128                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1129                                                                : U.pVal[0])))));
1130   }
1131 
1132   // Okay, all the short cuts are exhausted. We must compute it. The following
1133   // is a classical Babylonian method for computing the square root. This code
1134   // was adapted to APInt from a wikipedia article on such computations.
1135   // See http://www.wikipedia.org/ and go to the page named
1136   // Calculate_an_integer_square_root.
1137   unsigned nbits = BitWidth, i = 4;
1138   APInt testy(BitWidth, 16);
1139   APInt x_old(BitWidth, 1);
1140   APInt x_new(BitWidth, 0);
1141   APInt two(BitWidth, 2);
1142 
1143   // Select a good starting value using binary logarithms.
1144   for (;; i += 2, testy = testy.shl(2))
1145     if (i >= nbits || this->ule(testy)) {
1146       x_old = x_old.shl(i / 2);
1147       break;
1148     }
1149 
1150   // Use the Babylonian method to arrive at the integer square root:
1151   for (;;) {
1152     x_new = (this->udiv(x_old) + x_old).udiv(two);
1153     if (x_old.ule(x_new))
1154       break;
1155     x_old = x_new;
1156   }
1157 
1158   // Make sure we return the closest approximation
1159   // NOTE: The rounding calculation below is correct. It will produce an
1160   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1161   // determined to be a rounding issue with pari/gp as it begins to use a
1162   // floating point representation after 192 bits. There are no discrepancies
1163   // between this algorithm and pari/gp for bit widths < 192 bits.
1164   APInt square(x_old * x_old);
1165   APInt nextSquare((x_old + 1) * (x_old +1));
1166   if (this->ult(square))
1167     return x_old;
1168   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1169   APInt midpoint((nextSquare - square).udiv(two));
1170   APInt offset(*this - square);
1171   if (offset.ult(midpoint))
1172     return x_old;
1173   return x_old + 1;
1174 }
1175 
1176 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1177 /// iterative extended Euclidean algorithm is used to solve for this value,
1178 /// however we simplify it to speed up calculating only the inverse, and take
1179 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1180 /// (potentially large) APInts around.
1181 /// WARNING: a value of '0' may be returned,
1182 ///          signifying that no multiplicative inverse exists!
1183 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1184   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1185 
1186   // Using the properties listed at the following web page (accessed 06/21/08):
1187   //   http://www.numbertheory.org/php/euclid.html
1188   // (especially the properties numbered 3, 4 and 9) it can be proved that
1189   // BitWidth bits suffice for all the computations in the algorithm implemented
1190   // below. More precisely, this number of bits suffice if the multiplicative
1191   // inverse exists, but may not suffice for the general extended Euclidean
1192   // algorithm.
1193 
1194   APInt r[2] = { modulo, *this };
1195   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1196   APInt q(BitWidth, 0);
1197 
1198   unsigned i;
1199   for (i = 0; r[i^1] != 0; i ^= 1) {
1200     // An overview of the math without the confusing bit-flipping:
1201     // q = r[i-2] / r[i-1]
1202     // r[i] = r[i-2] % r[i-1]
1203     // t[i] = t[i-2] - t[i-1] * q
1204     udivrem(r[i], r[i^1], q, r[i]);
1205     t[i] -= t[i^1] * q;
1206   }
1207 
1208   // If this APInt and the modulo are not coprime, there is no multiplicative
1209   // inverse, so return 0. We check this by looking at the next-to-last
1210   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1211   // algorithm.
1212   if (r[i] != 1)
1213     return APInt(BitWidth, 0);
1214 
1215   // The next-to-last t is the multiplicative inverse.  However, we are
1216   // interested in a positive inverse. Calculate a positive one from a negative
1217   // one if necessary. A simple addition of the modulo suffices because
1218   // abs(t[i]) is known to be less than *this/2 (see the link above).
1219   if (t[i].isNegative())
1220     t[i] += modulo;
1221 
1222   return std::move(t[i]);
1223 }
1224 
1225 /// Calculate the magic numbers required to implement a signed integer division
1226 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1227 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1228 /// Warren, Jr., chapter 10.
1229 APInt::ms APInt::magic() const {
1230   const APInt& d = *this;
1231   unsigned p;
1232   APInt ad, anc, delta, q1, r1, q2, r2, t;
1233   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1234   struct ms mag;
1235 
1236   ad = d.abs();
1237   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1238   anc = t - 1 - t.urem(ad);   // absolute value of nc
1239   p = d.getBitWidth() - 1;    // initialize p
1240   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1241   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1242   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1243   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1244   do {
1245     p = p + 1;
1246     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1247     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1248     if (r1.uge(anc)) {  // must be unsigned comparison
1249       q1 = q1 + 1;
1250       r1 = r1 - anc;
1251     }
1252     q2 = q2<<1;          // update q2 = 2p/abs(d)
1253     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1254     if (r2.uge(ad)) {   // must be unsigned comparison
1255       q2 = q2 + 1;
1256       r2 = r2 - ad;
1257     }
1258     delta = ad - r2;
1259   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1260 
1261   mag.m = q2 + 1;
1262   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1263   mag.s = p - d.getBitWidth();          // resulting shift
1264   return mag;
1265 }
1266 
1267 /// Calculate the magic numbers required to implement an unsigned integer
1268 /// division by a constant as a sequence of multiplies, adds and shifts.
1269 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1270 /// S. Warren, Jr., chapter 10.
1271 /// LeadingZeros can be used to simplify the calculation if the upper bits
1272 /// of the divided value are known zero.
1273 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1274   const APInt& d = *this;
1275   unsigned p;
1276   APInt nc, delta, q1, r1, q2, r2;
1277   struct mu magu;
1278   magu.a = 0;               // initialize "add" indicator
1279   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1280   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1281   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1282 
1283   nc = allOnes - (allOnes - d).urem(d);
1284   p = d.getBitWidth() - 1;  // initialize p
1285   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1286   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1287   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1288   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1289   do {
1290     p = p + 1;
1291     if (r1.uge(nc - r1)) {
1292       q1 = q1 + q1 + 1;  // update q1
1293       r1 = r1 + r1 - nc; // update r1
1294     }
1295     else {
1296       q1 = q1+q1; // update q1
1297       r1 = r1+r1; // update r1
1298     }
1299     if ((r2 + 1).uge(d - r2)) {
1300       if (q2.uge(signedMax)) magu.a = 1;
1301       q2 = q2+q2 + 1;     // update q2
1302       r2 = r2+r2 + 1 - d; // update r2
1303     }
1304     else {
1305       if (q2.uge(signedMin)) magu.a = 1;
1306       q2 = q2+q2;     // update q2
1307       r2 = r2+r2 + 1; // update r2
1308     }
1309     delta = d - 1 - r2;
1310   } while (p < d.getBitWidth()*2 &&
1311            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1312   magu.m = q2 + 1; // resulting magic number
1313   magu.s = p - d.getBitWidth();  // resulting shift
1314   return magu;
1315 }
1316 
1317 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1318 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1319 /// variables here have the same names as in the algorithm. Comments explain
1320 /// the algorithm and any deviation from it.
1321 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1322                      unsigned m, unsigned n) {
1323   assert(u && "Must provide dividend");
1324   assert(v && "Must provide divisor");
1325   assert(q && "Must provide quotient");
1326   assert(u != v && u != q && v != q && "Must use different memory");
1327   assert(n>1 && "n must be > 1");
1328 
1329   // b denotes the base of the number system. In our case b is 2^32.
1330   const uint64_t b = uint64_t(1) << 32;
1331 
1332 // The DEBUG macros here tend to be spam in the debug output if you're not
1333 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1334 #ifdef KNUTH_DEBUG
1335 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1336 #else
1337 #define DEBUG_KNUTH(X) do {} while(false)
1338 #endif
1339 
1340   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1341   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1342   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1343   DEBUG_KNUTH(dbgs() << " by");
1344   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1345   DEBUG_KNUTH(dbgs() << '\n');
1346   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1347   // u and v by d. Note that we have taken Knuth's advice here to use a power
1348   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1349   // 2 allows us to shift instead of multiply and it is easy to determine the
1350   // shift amount from the leading zeros.  We are basically normalizing the u
1351   // and v so that its high bits are shifted to the top of v's range without
1352   // overflow. Note that this can require an extra word in u so that u must
1353   // be of length m+n+1.
1354   unsigned shift = countLeadingZeros(v[n-1]);
1355   uint32_t v_carry = 0;
1356   uint32_t u_carry = 0;
1357   if (shift) {
1358     for (unsigned i = 0; i < m+n; ++i) {
1359       uint32_t u_tmp = u[i] >> (32 - shift);
1360       u[i] = (u[i] << shift) | u_carry;
1361       u_carry = u_tmp;
1362     }
1363     for (unsigned i = 0; i < n; ++i) {
1364       uint32_t v_tmp = v[i] >> (32 - shift);
1365       v[i] = (v[i] << shift) | v_carry;
1366       v_carry = v_tmp;
1367     }
1368   }
1369   u[m+n] = u_carry;
1370 
1371   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1372   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1373   DEBUG_KNUTH(dbgs() << " by");
1374   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1375   DEBUG_KNUTH(dbgs() << '\n');
1376 
1377   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1378   int j = m;
1379   do {
1380     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1381     // D3. [Calculate q'.].
1382     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1383     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1384     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1385     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1386     // on v[n-2] determines at high speed most of the cases in which the trial
1387     // value qp is one too large, and it eliminates all cases where qp is two
1388     // too large.
1389     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1390     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1391     uint64_t qp = dividend / v[n-1];
1392     uint64_t rp = dividend % v[n-1];
1393     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1394       qp--;
1395       rp += v[n-1];
1396       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1397         qp--;
1398     }
1399     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1400 
1401     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1402     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1403     // consists of a simple multiplication by a one-place number, combined with
1404     // a subtraction.
1405     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1406     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1407     // true value plus b**(n+1), namely as the b's complement of
1408     // the true value, and a "borrow" to the left should be remembered.
1409     int64_t borrow = 0;
1410     for (unsigned i = 0; i < n; ++i) {
1411       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1412       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1413       u[j+i] = Lo_32(subres);
1414       borrow = Hi_32(p) - Hi_32(subres);
1415       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1416                         << ", borrow = " << borrow << '\n');
1417     }
1418     bool isNeg = u[j+n] < borrow;
1419     u[j+n] -= Lo_32(borrow);
1420 
1421     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1422     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1423     DEBUG_KNUTH(dbgs() << '\n');
1424 
1425     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1426     // negative, go to step D6; otherwise go on to step D7.
1427     q[j] = Lo_32(qp);
1428     if (isNeg) {
1429       // D6. [Add back]. The probability that this step is necessary is very
1430       // small, on the order of only 2/b. Make sure that test data accounts for
1431       // this possibility. Decrease q[j] by 1
1432       q[j]--;
1433       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1434       // A carry will occur to the left of u[j+n], and it should be ignored
1435       // since it cancels with the borrow that occurred in D4.
1436       bool carry = false;
1437       for (unsigned i = 0; i < n; i++) {
1438         uint32_t limit = std::min(u[j+i],v[i]);
1439         u[j+i] += v[i] + carry;
1440         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1441       }
1442       u[j+n] += carry;
1443     }
1444     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1445     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1446     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1447 
1448     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1449   } while (--j >= 0);
1450 
1451   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1452   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1453   DEBUG_KNUTH(dbgs() << '\n');
1454 
1455   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1456   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1457   // compute the remainder (urem uses this).
1458   if (r) {
1459     // The value d is expressed by the "shift" value above since we avoided
1460     // multiplication by d by using a shift left. So, all we have to do is
1461     // shift right here.
1462     if (shift) {
1463       uint32_t carry = 0;
1464       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1465       for (int i = n-1; i >= 0; i--) {
1466         r[i] = (u[i] >> shift) | carry;
1467         carry = u[i] << (32 - shift);
1468         DEBUG_KNUTH(dbgs() << " " << r[i]);
1469       }
1470     } else {
1471       for (int i = n-1; i >= 0; i--) {
1472         r[i] = u[i];
1473         DEBUG_KNUTH(dbgs() << " " << r[i]);
1474       }
1475     }
1476     DEBUG_KNUTH(dbgs() << '\n');
1477   }
1478   DEBUG_KNUTH(dbgs() << '\n');
1479 }
1480 
1481 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1482                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1483   assert(lhsWords >= rhsWords && "Fractional result");
1484 
1485   // First, compose the values into an array of 32-bit words instead of
1486   // 64-bit words. This is a necessity of both the "short division" algorithm
1487   // and the Knuth "classical algorithm" which requires there to be native
1488   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1489   // can't use 64-bit operands here because we don't have native results of
1490   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1491   // work on large-endian machines.
1492   unsigned n = rhsWords * 2;
1493   unsigned m = (lhsWords * 2) - n;
1494 
1495   // Allocate space for the temporary values we need either on the stack, if
1496   // it will fit, or on the heap if it won't.
1497   uint32_t SPACE[128];
1498   uint32_t *U = nullptr;
1499   uint32_t *V = nullptr;
1500   uint32_t *Q = nullptr;
1501   uint32_t *R = nullptr;
1502   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1503     U = &SPACE[0];
1504     V = &SPACE[m+n+1];
1505     Q = &SPACE[(m+n+1) + n];
1506     if (Remainder)
1507       R = &SPACE[(m+n+1) + n + (m+n)];
1508   } else {
1509     U = new uint32_t[m + n + 1];
1510     V = new uint32_t[n];
1511     Q = new uint32_t[m+n];
1512     if (Remainder)
1513       R = new uint32_t[n];
1514   }
1515 
1516   // Initialize the dividend
1517   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1518   for (unsigned i = 0; i < lhsWords; ++i) {
1519     uint64_t tmp = LHS[i];
1520     U[i * 2] = Lo_32(tmp);
1521     U[i * 2 + 1] = Hi_32(tmp);
1522   }
1523   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1524 
1525   // Initialize the divisor
1526   memset(V, 0, (n)*sizeof(uint32_t));
1527   for (unsigned i = 0; i < rhsWords; ++i) {
1528     uint64_t tmp = RHS[i];
1529     V[i * 2] = Lo_32(tmp);
1530     V[i * 2 + 1] = Hi_32(tmp);
1531   }
1532 
1533   // initialize the quotient and remainder
1534   memset(Q, 0, (m+n) * sizeof(uint32_t));
1535   if (Remainder)
1536     memset(R, 0, n * sizeof(uint32_t));
1537 
1538   // Now, adjust m and n for the Knuth division. n is the number of words in
1539   // the divisor. m is the number of words by which the dividend exceeds the
1540   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1541   // contain any zero words or the Knuth algorithm fails.
1542   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1543     n--;
1544     m++;
1545   }
1546   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1547     m--;
1548 
1549   // If we're left with only a single word for the divisor, Knuth doesn't work
1550   // so we implement the short division algorithm here. This is much simpler
1551   // and faster because we are certain that we can divide a 64-bit quantity
1552   // by a 32-bit quantity at hardware speed and short division is simply a
1553   // series of such operations. This is just like doing short division but we
1554   // are using base 2^32 instead of base 10.
1555   assert(n != 0 && "Divide by zero?");
1556   if (n == 1) {
1557     uint32_t divisor = V[0];
1558     uint32_t remainder = 0;
1559     for (int i = m; i >= 0; i--) {
1560       uint64_t partial_dividend = Make_64(remainder, U[i]);
1561       if (partial_dividend == 0) {
1562         Q[i] = 0;
1563         remainder = 0;
1564       } else if (partial_dividend < divisor) {
1565         Q[i] = 0;
1566         remainder = Lo_32(partial_dividend);
1567       } else if (partial_dividend == divisor) {
1568         Q[i] = 1;
1569         remainder = 0;
1570       } else {
1571         Q[i] = Lo_32(partial_dividend / divisor);
1572         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1573       }
1574     }
1575     if (R)
1576       R[0] = remainder;
1577   } else {
1578     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1579     // case n > 1.
1580     KnuthDiv(U, V, Q, R, m, n);
1581   }
1582 
1583   // If the caller wants the quotient
1584   if (Quotient) {
1585     for (unsigned i = 0; i < lhsWords; ++i)
1586       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1587   }
1588 
1589   // If the caller wants the remainder
1590   if (Remainder) {
1591     for (unsigned i = 0; i < rhsWords; ++i)
1592       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1593   }
1594 
1595   // Clean up the memory we allocated.
1596   if (U != &SPACE[0]) {
1597     delete [] U;
1598     delete [] V;
1599     delete [] Q;
1600     delete [] R;
1601   }
1602 }
1603 
1604 APInt APInt::udiv(const APInt &RHS) const {
1605   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1606 
1607   // First, deal with the easy case
1608   if (isSingleWord()) {
1609     assert(RHS.U.VAL != 0 && "Divide by zero?");
1610     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1611   }
1612 
1613   // Get some facts about the LHS and RHS number of bits and words
1614   unsigned lhsWords = getNumWords(getActiveBits());
1615   unsigned rhsBits  = RHS.getActiveBits();
1616   unsigned rhsWords = getNumWords(rhsBits);
1617   assert(rhsWords && "Divided by zero???");
1618 
1619   // Deal with some degenerate cases
1620   if (!lhsWords)
1621     // 0 / X ===> 0
1622     return APInt(BitWidth, 0);
1623   if (rhsBits == 1)
1624     // X / 1 ===> X
1625     return *this;
1626   if (lhsWords < rhsWords || this->ult(RHS))
1627     // X / Y ===> 0, iff X < Y
1628     return APInt(BitWidth, 0);
1629   if (*this == RHS)
1630     // X / X ===> 1
1631     return APInt(BitWidth, 1);
1632   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1633     // All high words are zero, just use native divide
1634     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1635 
1636   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1637   APInt Quotient(BitWidth, 0); // to hold result.
1638   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1639   return Quotient;
1640 }
1641 
1642 APInt APInt::udiv(uint64_t RHS) const {
1643   assert(RHS != 0 && "Divide by zero?");
1644 
1645   // First, deal with the easy case
1646   if (isSingleWord())
1647     return APInt(BitWidth, U.VAL / RHS);
1648 
1649   // Get some facts about the LHS words.
1650   unsigned lhsWords = getNumWords(getActiveBits());
1651 
1652   // Deal with some degenerate cases
1653   if (!lhsWords)
1654     // 0 / X ===> 0
1655     return APInt(BitWidth, 0);
1656   if (RHS == 1)
1657     // X / 1 ===> X
1658     return *this;
1659   if (this->ult(RHS))
1660     // X / Y ===> 0, iff X < Y
1661     return APInt(BitWidth, 0);
1662   if (*this == RHS)
1663     // X / X ===> 1
1664     return APInt(BitWidth, 1);
1665   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1666     // All high words are zero, just use native divide
1667     return APInt(BitWidth, this->U.pVal[0] / RHS);
1668 
1669   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1670   APInt Quotient(BitWidth, 0); // to hold result.
1671   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1672   return Quotient;
1673 }
1674 
1675 APInt APInt::sdiv(const APInt &RHS) const {
1676   if (isNegative()) {
1677     if (RHS.isNegative())
1678       return (-(*this)).udiv(-RHS);
1679     return -((-(*this)).udiv(RHS));
1680   }
1681   if (RHS.isNegative())
1682     return -(this->udiv(-RHS));
1683   return this->udiv(RHS);
1684 }
1685 
1686 APInt APInt::sdiv(int64_t RHS) const {
1687   if (isNegative()) {
1688     if (RHS < 0)
1689       return (-(*this)).udiv(-RHS);
1690     return -((-(*this)).udiv(RHS));
1691   }
1692   if (RHS < 0)
1693     return -(this->udiv(-RHS));
1694   return this->udiv(RHS);
1695 }
1696 
1697 APInt APInt::urem(const APInt &RHS) const {
1698   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1699   if (isSingleWord()) {
1700     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1701     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1702   }
1703 
1704   // Get some facts about the LHS
1705   unsigned lhsWords = getNumWords(getActiveBits());
1706 
1707   // Get some facts about the RHS
1708   unsigned rhsBits = RHS.getActiveBits();
1709   unsigned rhsWords = getNumWords(rhsBits);
1710   assert(rhsWords && "Performing remainder operation by zero ???");
1711 
1712   // Check the degenerate cases
1713   if (lhsWords == 0)
1714     // 0 % Y ===> 0
1715     return APInt(BitWidth, 0);
1716   if (rhsBits == 1)
1717     // X % 1 ===> 0
1718     return APInt(BitWidth, 0);
1719   if (lhsWords < rhsWords || this->ult(RHS))
1720     // X % Y ===> X, iff X < Y
1721     return *this;
1722   if (*this == RHS)
1723     // X % X == 0;
1724     return APInt(BitWidth, 0);
1725   if (lhsWords == 1)
1726     // All high words are zero, just use native remainder
1727     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1728 
1729   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1730   APInt Remainder(BitWidth, 0);
1731   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1732   return Remainder;
1733 }
1734 
1735 uint64_t APInt::urem(uint64_t RHS) const {
1736   assert(RHS != 0 && "Remainder by zero?");
1737 
1738   if (isSingleWord())
1739     return U.VAL % RHS;
1740 
1741   // Get some facts about the LHS
1742   unsigned lhsWords = getNumWords(getActiveBits());
1743 
1744   // Check the degenerate cases
1745   if (lhsWords == 0)
1746     // 0 % Y ===> 0
1747     return 0;
1748   if (RHS == 1)
1749     // X % 1 ===> 0
1750     return 0;
1751   if (this->ult(RHS))
1752     // X % Y ===> X, iff X < Y
1753     return getZExtValue();
1754   if (*this == RHS)
1755     // X % X == 0;
1756     return 0;
1757   if (lhsWords == 1)
1758     // All high words are zero, just use native remainder
1759     return U.pVal[0] % RHS;
1760 
1761   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1762   uint64_t Remainder;
1763   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1764   return Remainder;
1765 }
1766 
1767 APInt APInt::srem(const APInt &RHS) const {
1768   if (isNegative()) {
1769     if (RHS.isNegative())
1770       return -((-(*this)).urem(-RHS));
1771     return -((-(*this)).urem(RHS));
1772   }
1773   if (RHS.isNegative())
1774     return this->urem(-RHS);
1775   return this->urem(RHS);
1776 }
1777 
1778 int64_t APInt::srem(int64_t RHS) const {
1779   if (isNegative()) {
1780     if (RHS < 0)
1781       return -((-(*this)).urem(-RHS));
1782     return -((-(*this)).urem(RHS));
1783   }
1784   if (RHS < 0)
1785     return this->urem(-RHS);
1786   return this->urem(RHS);
1787 }
1788 
1789 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1790                     APInt &Quotient, APInt &Remainder) {
1791   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1792   unsigned BitWidth = LHS.BitWidth;
1793 
1794   // First, deal with the easy case
1795   if (LHS.isSingleWord()) {
1796     assert(RHS.U.VAL != 0 && "Divide by zero?");
1797     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1798     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1799     Quotient = APInt(BitWidth, QuotVal);
1800     Remainder = APInt(BitWidth, RemVal);
1801     return;
1802   }
1803 
1804   // Get some size facts about the dividend and divisor
1805   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1806   unsigned rhsBits  = RHS.getActiveBits();
1807   unsigned rhsWords = getNumWords(rhsBits);
1808   assert(rhsWords && "Performing divrem operation by zero ???");
1809 
1810   // Check the degenerate cases
1811   if (lhsWords == 0) {
1812     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1813     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1814     return;
1815   }
1816 
1817   if (rhsBits == 1) {
1818     Quotient = LHS;                   // X / 1 ===> X
1819     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1820   }
1821 
1822   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1823     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1824     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1825     return;
1826   }
1827 
1828   if (LHS == RHS) {
1829     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1830     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1831     return;
1832   }
1833 
1834   // Make sure there is enough space to hold the results.
1835   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1836   // change the size. This is necessary if Quotient or Remainder is aliased
1837   // with LHS or RHS.
1838   Quotient.reallocate(BitWidth);
1839   Remainder.reallocate(BitWidth);
1840 
1841   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1842     // There is only one word to consider so use the native versions.
1843     uint64_t lhsValue = LHS.U.pVal[0];
1844     uint64_t rhsValue = RHS.U.pVal[0];
1845     Quotient = lhsValue / rhsValue;
1846     Remainder = lhsValue % rhsValue;
1847     return;
1848   }
1849 
1850   // Okay, lets do it the long way
1851   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1852          Remainder.U.pVal);
1853   // Clear the rest of the Quotient and Remainder.
1854   std::memset(Quotient.U.pVal + lhsWords, 0,
1855               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1856   std::memset(Remainder.U.pVal + rhsWords, 0,
1857               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1858 }
1859 
1860 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1861                     uint64_t &Remainder) {
1862   assert(RHS != 0 && "Divide by zero?");
1863   unsigned BitWidth = LHS.BitWidth;
1864 
1865   // First, deal with the easy case
1866   if (LHS.isSingleWord()) {
1867     uint64_t QuotVal = LHS.U.VAL / RHS;
1868     Remainder = LHS.U.VAL % RHS;
1869     Quotient = APInt(BitWidth, QuotVal);
1870     return;
1871   }
1872 
1873   // Get some size facts about the dividend and divisor
1874   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1875 
1876   // Check the degenerate cases
1877   if (lhsWords == 0) {
1878     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1879     Remainder = 0;                    // 0 % Y ===> 0
1880     return;
1881   }
1882 
1883   if (RHS == 1) {
1884     Quotient = LHS;                   // X / 1 ===> X
1885     Remainder = 0;                    // X % 1 ===> 0
1886     return;
1887   }
1888 
1889   if (LHS.ult(RHS)) {
1890     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1891     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1892     return;
1893   }
1894 
1895   if (LHS == RHS) {
1896     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1897     Remainder = 0;                    // X % X ===> 0;
1898     return;
1899   }
1900 
1901   // Make sure there is enough space to hold the results.
1902   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1903   // change the size. This is necessary if Quotient is aliased with LHS.
1904   Quotient.reallocate(BitWidth);
1905 
1906   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1907     // There is only one word to consider so use the native versions.
1908     uint64_t lhsValue = LHS.U.pVal[0];
1909     Quotient = lhsValue / RHS;
1910     Remainder = lhsValue % RHS;
1911     return;
1912   }
1913 
1914   // Okay, lets do it the long way
1915   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1916   // Clear the rest of the Quotient.
1917   std::memset(Quotient.U.pVal + lhsWords, 0,
1918               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1919 }
1920 
1921 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1922                     APInt &Quotient, APInt &Remainder) {
1923   if (LHS.isNegative()) {
1924     if (RHS.isNegative())
1925       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1926     else {
1927       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1928       Quotient.negate();
1929     }
1930     Remainder.negate();
1931   } else if (RHS.isNegative()) {
1932     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1933     Quotient.negate();
1934   } else {
1935     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1936   }
1937 }
1938 
1939 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1940                     APInt &Quotient, int64_t &Remainder) {
1941   uint64_t R = Remainder;
1942   if (LHS.isNegative()) {
1943     if (RHS < 0)
1944       APInt::udivrem(-LHS, -RHS, Quotient, R);
1945     else {
1946       APInt::udivrem(-LHS, RHS, Quotient, R);
1947       Quotient.negate();
1948     }
1949     R = -R;
1950   } else if (RHS < 0) {
1951     APInt::udivrem(LHS, -RHS, Quotient, R);
1952     Quotient.negate();
1953   } else {
1954     APInt::udivrem(LHS, RHS, Quotient, R);
1955   }
1956   Remainder = R;
1957 }
1958 
1959 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1960   APInt Res = *this+RHS;
1961   Overflow = isNonNegative() == RHS.isNonNegative() &&
1962              Res.isNonNegative() != isNonNegative();
1963   return Res;
1964 }
1965 
1966 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1967   APInt Res = *this+RHS;
1968   Overflow = Res.ult(RHS);
1969   return Res;
1970 }
1971 
1972 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1973   APInt Res = *this - RHS;
1974   Overflow = isNonNegative() != RHS.isNonNegative() &&
1975              Res.isNonNegative() != isNonNegative();
1976   return Res;
1977 }
1978 
1979 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1980   APInt Res = *this-RHS;
1981   Overflow = Res.ugt(*this);
1982   return Res;
1983 }
1984 
1985 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1986   // MININT/-1  -->  overflow.
1987   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1988   return sdiv(RHS);
1989 }
1990 
1991 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1992   APInt Res = *this * RHS;
1993 
1994   if (*this != 0 && RHS != 0)
1995     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1996   else
1997     Overflow = false;
1998   return Res;
1999 }
2000 
2001 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
2002   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
2003     Overflow = true;
2004     return *this * RHS;
2005   }
2006 
2007   APInt Res = lshr(1) * RHS;
2008   Overflow = Res.isNegative();
2009   Res <<= 1;
2010   if ((*this)[0]) {
2011     Res += RHS;
2012     if (Res.ult(RHS))
2013       Overflow = true;
2014   }
2015   return Res;
2016 }
2017 
2018 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
2019   Overflow = ShAmt.uge(getBitWidth());
2020   if (Overflow)
2021     return APInt(BitWidth, 0);
2022 
2023   if (isNonNegative()) // Don't allow sign change.
2024     Overflow = ShAmt.uge(countLeadingZeros());
2025   else
2026     Overflow = ShAmt.uge(countLeadingOnes());
2027 
2028   return *this << ShAmt;
2029 }
2030 
2031 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2032   Overflow = ShAmt.uge(getBitWidth());
2033   if (Overflow)
2034     return APInt(BitWidth, 0);
2035 
2036   Overflow = ShAmt.ugt(countLeadingZeros());
2037 
2038   return *this << ShAmt;
2039 }
2040 
2041 APInt APInt::sadd_sat(const APInt &RHS) const {
2042   bool Overflow;
2043   APInt Res = sadd_ov(RHS, Overflow);
2044   if (!Overflow)
2045     return Res;
2046 
2047   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2048                       : APInt::getSignedMaxValue(BitWidth);
2049 }
2050 
2051 APInt APInt::uadd_sat(const APInt &RHS) const {
2052   bool Overflow;
2053   APInt Res = uadd_ov(RHS, Overflow);
2054   if (!Overflow)
2055     return Res;
2056 
2057   return APInt::getMaxValue(BitWidth);
2058 }
2059 
2060 APInt APInt::ssub_sat(const APInt &RHS) const {
2061   bool Overflow;
2062   APInt Res = ssub_ov(RHS, Overflow);
2063   if (!Overflow)
2064     return Res;
2065 
2066   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2067                       : APInt::getSignedMaxValue(BitWidth);
2068 }
2069 
2070 APInt APInt::usub_sat(const APInt &RHS) const {
2071   bool Overflow;
2072   APInt Res = usub_ov(RHS, Overflow);
2073   if (!Overflow)
2074     return Res;
2075 
2076   return APInt(BitWidth, 0);
2077 }
2078 
2079 APInt APInt::smul_sat(const APInt &RHS) const {
2080   bool Overflow;
2081   APInt Res = smul_ov(RHS, Overflow);
2082   if (!Overflow)
2083     return Res;
2084 
2085   // The result is negative if one and only one of inputs is negative.
2086   bool ResIsNegative = isNegative() ^ RHS.isNegative();
2087 
2088   return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2089                        : APInt::getSignedMaxValue(BitWidth);
2090 }
2091 
2092 APInt APInt::umul_sat(const APInt &RHS) const {
2093   bool Overflow;
2094   APInt Res = umul_ov(RHS, Overflow);
2095   if (!Overflow)
2096     return Res;
2097 
2098   return APInt::getMaxValue(BitWidth);
2099 }
2100 
2101 APInt APInt::sshl_sat(const APInt &RHS) const {
2102   bool Overflow;
2103   APInt Res = sshl_ov(RHS, Overflow);
2104   if (!Overflow)
2105     return Res;
2106 
2107   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2108                       : APInt::getSignedMaxValue(BitWidth);
2109 }
2110 
2111 APInt APInt::ushl_sat(const APInt &RHS) const {
2112   bool Overflow;
2113   APInt Res = ushl_ov(RHS, Overflow);
2114   if (!Overflow)
2115     return Res;
2116 
2117   return APInt::getMaxValue(BitWidth);
2118 }
2119 
2120 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2121   // Check our assumptions here
2122   assert(!str.empty() && "Invalid string length");
2123   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2124           radix == 36) &&
2125          "Radix should be 2, 8, 10, 16, or 36!");
2126 
2127   StringRef::iterator p = str.begin();
2128   size_t slen = str.size();
2129   bool isNeg = *p == '-';
2130   if (*p == '-' || *p == '+') {
2131     p++;
2132     slen--;
2133     assert(slen && "String is only a sign, needs a value.");
2134   }
2135   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2136   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2137   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2138   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2139          "Insufficient bit width");
2140 
2141   // Allocate memory if needed
2142   if (isSingleWord())
2143     U.VAL = 0;
2144   else
2145     U.pVal = getClearedMemory(getNumWords());
2146 
2147   // Figure out if we can shift instead of multiply
2148   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2149 
2150   // Enter digit traversal loop
2151   for (StringRef::iterator e = str.end(); p != e; ++p) {
2152     unsigned digit = getDigit(*p, radix);
2153     assert(digit < radix && "Invalid character in digit string");
2154 
2155     // Shift or multiply the value by the radix
2156     if (slen > 1) {
2157       if (shift)
2158         *this <<= shift;
2159       else
2160         *this *= radix;
2161     }
2162 
2163     // Add in the digit we just interpreted
2164     *this += digit;
2165   }
2166   // If its negative, put it in two's complement form
2167   if (isNeg)
2168     this->negate();
2169 }
2170 
2171 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2172                      bool Signed, bool formatAsCLiteral) const {
2173   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2174           Radix == 36) &&
2175          "Radix should be 2, 8, 10, 16, or 36!");
2176 
2177   const char *Prefix = "";
2178   if (formatAsCLiteral) {
2179     switch (Radix) {
2180       case 2:
2181         // Binary literals are a non-standard extension added in gcc 4.3:
2182         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2183         Prefix = "0b";
2184         break;
2185       case 8:
2186         Prefix = "0";
2187         break;
2188       case 10:
2189         break; // No prefix
2190       case 16:
2191         Prefix = "0x";
2192         break;
2193       default:
2194         llvm_unreachable("Invalid radix!");
2195     }
2196   }
2197 
2198   // First, check for a zero value and just short circuit the logic below.
2199   if (*this == 0) {
2200     while (*Prefix) {
2201       Str.push_back(*Prefix);
2202       ++Prefix;
2203     };
2204     Str.push_back('0');
2205     return;
2206   }
2207 
2208   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2209 
2210   if (isSingleWord()) {
2211     char Buffer[65];
2212     char *BufPtr = std::end(Buffer);
2213 
2214     uint64_t N;
2215     if (!Signed) {
2216       N = getZExtValue();
2217     } else {
2218       int64_t I = getSExtValue();
2219       if (I >= 0) {
2220         N = I;
2221       } else {
2222         Str.push_back('-');
2223         N = -(uint64_t)I;
2224       }
2225     }
2226 
2227     while (*Prefix) {
2228       Str.push_back(*Prefix);
2229       ++Prefix;
2230     };
2231 
2232     while (N) {
2233       *--BufPtr = Digits[N % Radix];
2234       N /= Radix;
2235     }
2236     Str.append(BufPtr, std::end(Buffer));
2237     return;
2238   }
2239 
2240   APInt Tmp(*this);
2241 
2242   if (Signed && isNegative()) {
2243     // They want to print the signed version and it is a negative value
2244     // Flip the bits and add one to turn it into the equivalent positive
2245     // value and put a '-' in the result.
2246     Tmp.negate();
2247     Str.push_back('-');
2248   }
2249 
2250   while (*Prefix) {
2251     Str.push_back(*Prefix);
2252     ++Prefix;
2253   };
2254 
2255   // We insert the digits backward, then reverse them to get the right order.
2256   unsigned StartDig = Str.size();
2257 
2258   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2259   // because the number of bits per digit (1, 3 and 4 respectively) divides
2260   // equally.  We just shift until the value is zero.
2261   if (Radix == 2 || Radix == 8 || Radix == 16) {
2262     // Just shift tmp right for each digit width until it becomes zero
2263     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2264     unsigned MaskAmt = Radix - 1;
2265 
2266     while (Tmp.getBoolValue()) {
2267       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2268       Str.push_back(Digits[Digit]);
2269       Tmp.lshrInPlace(ShiftAmt);
2270     }
2271   } else {
2272     while (Tmp.getBoolValue()) {
2273       uint64_t Digit;
2274       udivrem(Tmp, Radix, Tmp, Digit);
2275       assert(Digit < Radix && "divide failed");
2276       Str.push_back(Digits[Digit]);
2277     }
2278   }
2279 
2280   // Reverse the digits before returning.
2281   std::reverse(Str.begin()+StartDig, Str.end());
2282 }
2283 
2284 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2285 /// It is better to pass in a SmallVector/SmallString to the methods above.
2286 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2287   SmallString<40> S;
2288   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2289   return std::string(S.str());
2290 }
2291 
2292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2293 LLVM_DUMP_METHOD void APInt::dump() const {
2294   SmallString<40> S, U;
2295   this->toStringUnsigned(U);
2296   this->toStringSigned(S);
2297   dbgs() << "APInt(" << BitWidth << "b, "
2298          << U << "u " << S << "s)\n";
2299 }
2300 #endif
2301 
2302 void APInt::print(raw_ostream &OS, bool isSigned) const {
2303   SmallString<40> S;
2304   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2305   OS << S;
2306 }
2307 
2308 // This implements a variety of operations on a representation of
2309 // arbitrary precision, two's-complement, bignum integer values.
2310 
2311 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2312 // and unrestricting assumption.
2313 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2314               "Part width must be divisible by 2!");
2315 
2316 /* Some handy functions local to this file.  */
2317 
2318 /* Returns the integer part with the least significant BITS set.
2319    BITS cannot be zero.  */
2320 static inline APInt::WordType lowBitMask(unsigned bits) {
2321   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2322 
2323   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2324 }
2325 
2326 /* Returns the value of the lower half of PART.  */
2327 static inline APInt::WordType lowHalf(APInt::WordType part) {
2328   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2329 }
2330 
2331 /* Returns the value of the upper half of PART.  */
2332 static inline APInt::WordType highHalf(APInt::WordType part) {
2333   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2334 }
2335 
2336 /* Returns the bit number of the most significant set bit of a part.
2337    If the input number has no bits set -1U is returned.  */
2338 static unsigned partMSB(APInt::WordType value) {
2339   return findLastSet(value, ZB_Max);
2340 }
2341 
2342 /* Returns the bit number of the least significant set bit of a
2343    part.  If the input number has no bits set -1U is returned.  */
2344 static unsigned partLSB(APInt::WordType value) {
2345   return findFirstSet(value, ZB_Max);
2346 }
2347 
2348 /* Sets the least significant part of a bignum to the input value, and
2349    zeroes out higher parts.  */
2350 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2351   assert(parts > 0);
2352 
2353   dst[0] = part;
2354   for (unsigned i = 1; i < parts; i++)
2355     dst[i] = 0;
2356 }
2357 
2358 /* Assign one bignum to another.  */
2359 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2360   for (unsigned i = 0; i < parts; i++)
2361     dst[i] = src[i];
2362 }
2363 
2364 /* Returns true if a bignum is zero, false otherwise.  */
2365 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2366   for (unsigned i = 0; i < parts; i++)
2367     if (src[i])
2368       return false;
2369 
2370   return true;
2371 }
2372 
2373 /* Extract the given bit of a bignum; returns 0 or 1.  */
2374 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2375   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2376 }
2377 
2378 /* Set the given bit of a bignum. */
2379 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2380   parts[whichWord(bit)] |= maskBit(bit);
2381 }
2382 
2383 /* Clears the given bit of a bignum. */
2384 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2385   parts[whichWord(bit)] &= ~maskBit(bit);
2386 }
2387 
2388 /* Returns the bit number of the least significant set bit of a
2389    number.  If the input number has no bits set -1U is returned.  */
2390 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2391   for (unsigned i = 0; i < n; i++) {
2392     if (parts[i] != 0) {
2393       unsigned lsb = partLSB(parts[i]);
2394 
2395       return lsb + i * APINT_BITS_PER_WORD;
2396     }
2397   }
2398 
2399   return -1U;
2400 }
2401 
2402 /* Returns the bit number of the most significant set bit of a number.
2403    If the input number has no bits set -1U is returned.  */
2404 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2405   do {
2406     --n;
2407 
2408     if (parts[n] != 0) {
2409       unsigned msb = partMSB(parts[n]);
2410 
2411       return msb + n * APINT_BITS_PER_WORD;
2412     }
2413   } while (n);
2414 
2415   return -1U;
2416 }
2417 
2418 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2419    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2420    the least significant bit of DST.  All high bits above srcBITS in
2421    DST are zero-filled.  */
2422 void
2423 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2424                  unsigned srcBits, unsigned srcLSB) {
2425   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2426   assert(dstParts <= dstCount);
2427 
2428   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2429   tcAssign (dst, src + firstSrcPart, dstParts);
2430 
2431   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2432   tcShiftRight (dst, dstParts, shift);
2433 
2434   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2435      in DST.  If this is less that srcBits, append the rest, else
2436      clear the high bits.  */
2437   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2438   if (n < srcBits) {
2439     WordType mask = lowBitMask (srcBits - n);
2440     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2441                           << n % APINT_BITS_PER_WORD);
2442   } else if (n > srcBits) {
2443     if (srcBits % APINT_BITS_PER_WORD)
2444       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2445   }
2446 
2447   /* Clear high parts.  */
2448   while (dstParts < dstCount)
2449     dst[dstParts++] = 0;
2450 }
2451 
2452 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2453 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2454                              WordType c, unsigned parts) {
2455   assert(c <= 1);
2456 
2457   for (unsigned i = 0; i < parts; i++) {
2458     WordType l = dst[i];
2459     if (c) {
2460       dst[i] += rhs[i] + 1;
2461       c = (dst[i] <= l);
2462     } else {
2463       dst[i] += rhs[i];
2464       c = (dst[i] < l);
2465     }
2466   }
2467 
2468   return c;
2469 }
2470 
2471 /// This function adds a single "word" integer, src, to the multiple
2472 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2473 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2474 /// @returns the carry of the addition.
2475 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2476                                  unsigned parts) {
2477   for (unsigned i = 0; i < parts; ++i) {
2478     dst[i] += src;
2479     if (dst[i] >= src)
2480       return 0; // No need to carry so exit early.
2481     src = 1; // Carry one to next digit.
2482   }
2483 
2484   return 1;
2485 }
2486 
2487 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2488 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2489                                   WordType c, unsigned parts) {
2490   assert(c <= 1);
2491 
2492   for (unsigned i = 0; i < parts; i++) {
2493     WordType l = dst[i];
2494     if (c) {
2495       dst[i] -= rhs[i] + 1;
2496       c = (dst[i] >= l);
2497     } else {
2498       dst[i] -= rhs[i];
2499       c = (dst[i] > l);
2500     }
2501   }
2502 
2503   return c;
2504 }
2505 
2506 /// This function subtracts a single "word" (64-bit word), src, from
2507 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2508 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2509 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2510 /// exhausted. In other words, if src > dst then this function returns 1,
2511 /// otherwise 0.
2512 /// @returns the borrow out of the subtraction
2513 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2514                                       unsigned parts) {
2515   for (unsigned i = 0; i < parts; ++i) {
2516     WordType Dst = dst[i];
2517     dst[i] -= src;
2518     if (src <= Dst)
2519       return 0; // No need to borrow so exit early.
2520     src = 1; // We have to "borrow 1" from next "word"
2521   }
2522 
2523   return 1;
2524 }
2525 
2526 /* Negate a bignum in-place.  */
2527 void APInt::tcNegate(WordType *dst, unsigned parts) {
2528   tcComplement(dst, parts);
2529   tcIncrement(dst, parts);
2530 }
2531 
2532 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2533     DST  = SRC * MULTIPLIER + CARRY   if add is false
2534 
2535     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2536     they must start at the same point, i.e. DST == SRC.
2537 
2538     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2539     returned.  Otherwise DST is filled with the least significant
2540     DSTPARTS parts of the result, and if all of the omitted higher
2541     parts were zero return zero, otherwise overflow occurred and
2542     return one.  */
2543 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2544                           WordType multiplier, WordType carry,
2545                           unsigned srcParts, unsigned dstParts,
2546                           bool add) {
2547   /* Otherwise our writes of DST kill our later reads of SRC.  */
2548   assert(dst <= src || dst >= src + srcParts);
2549   assert(dstParts <= srcParts + 1);
2550 
2551   /* N loops; minimum of dstParts and srcParts.  */
2552   unsigned n = std::min(dstParts, srcParts);
2553 
2554   for (unsigned i = 0; i < n; i++) {
2555     WordType low, mid, high, srcPart;
2556 
2557       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2558 
2559          This cannot overflow, because
2560 
2561          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2562 
2563          which is less than n^2.  */
2564 
2565     srcPart = src[i];
2566 
2567     if (multiplier == 0 || srcPart == 0) {
2568       low = carry;
2569       high = 0;
2570     } else {
2571       low = lowHalf(srcPart) * lowHalf(multiplier);
2572       high = highHalf(srcPart) * highHalf(multiplier);
2573 
2574       mid = lowHalf(srcPart) * highHalf(multiplier);
2575       high += highHalf(mid);
2576       mid <<= APINT_BITS_PER_WORD / 2;
2577       if (low + mid < low)
2578         high++;
2579       low += mid;
2580 
2581       mid = highHalf(srcPart) * lowHalf(multiplier);
2582       high += highHalf(mid);
2583       mid <<= APINT_BITS_PER_WORD / 2;
2584       if (low + mid < low)
2585         high++;
2586       low += mid;
2587 
2588       /* Now add carry.  */
2589       if (low + carry < low)
2590         high++;
2591       low += carry;
2592     }
2593 
2594     if (add) {
2595       /* And now DST[i], and store the new low part there.  */
2596       if (low + dst[i] < low)
2597         high++;
2598       dst[i] += low;
2599     } else
2600       dst[i] = low;
2601 
2602     carry = high;
2603   }
2604 
2605   if (srcParts < dstParts) {
2606     /* Full multiplication, there is no overflow.  */
2607     assert(srcParts + 1 == dstParts);
2608     dst[srcParts] = carry;
2609     return 0;
2610   }
2611 
2612   /* We overflowed if there is carry.  */
2613   if (carry)
2614     return 1;
2615 
2616   /* We would overflow if any significant unwritten parts would be
2617      non-zero.  This is true if any remaining src parts are non-zero
2618      and the multiplier is non-zero.  */
2619   if (multiplier)
2620     for (unsigned i = dstParts; i < srcParts; i++)
2621       if (src[i])
2622         return 1;
2623 
2624   /* We fitted in the narrow destination.  */
2625   return 0;
2626 }
2627 
2628 /* DST = LHS * RHS, where DST has the same width as the operands and
2629    is filled with the least significant parts of the result.  Returns
2630    one if overflow occurred, otherwise zero.  DST must be disjoint
2631    from both operands.  */
2632 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2633                       const WordType *rhs, unsigned parts) {
2634   assert(dst != lhs && dst != rhs);
2635 
2636   int overflow = 0;
2637   tcSet(dst, 0, parts);
2638 
2639   for (unsigned i = 0; i < parts; i++)
2640     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2641                                parts - i, true);
2642 
2643   return overflow;
2644 }
2645 
2646 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2647 /// operands. No overflow occurs. DST must be disjoint from both operands.
2648 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2649                            const WordType *rhs, unsigned lhsParts,
2650                            unsigned rhsParts) {
2651   /* Put the narrower number on the LHS for less loops below.  */
2652   if (lhsParts > rhsParts)
2653     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2654 
2655   assert(dst != lhs && dst != rhs);
2656 
2657   tcSet(dst, 0, rhsParts);
2658 
2659   for (unsigned i = 0; i < lhsParts; i++)
2660     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2661 }
2662 
2663 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2664    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2665    set REMAINDER to the remainder, return zero.  i.e.
2666 
2667    OLD_LHS = RHS * LHS + REMAINDER
2668 
2669    SCRATCH is a bignum of the same size as the operands and result for
2670    use by the routine; its contents need not be initialized and are
2671    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2672 */
2673 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2674                     WordType *remainder, WordType *srhs,
2675                     unsigned parts) {
2676   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2677 
2678   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2679   if (shiftCount == 0)
2680     return true;
2681 
2682   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2683   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2684   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2685 
2686   tcAssign(srhs, rhs, parts);
2687   tcShiftLeft(srhs, parts, shiftCount);
2688   tcAssign(remainder, lhs, parts);
2689   tcSet(lhs, 0, parts);
2690 
2691   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2692      the total.  */
2693   for (;;) {
2694     int compare = tcCompare(remainder, srhs, parts);
2695     if (compare >= 0) {
2696       tcSubtract(remainder, srhs, 0, parts);
2697       lhs[n] |= mask;
2698     }
2699 
2700     if (shiftCount == 0)
2701       break;
2702     shiftCount--;
2703     tcShiftRight(srhs, parts, 1);
2704     if ((mask >>= 1) == 0) {
2705       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2706       n--;
2707     }
2708   }
2709 
2710   return false;
2711 }
2712 
2713 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2714 /// no restrictions on Count.
2715 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2716   // Don't bother performing a no-op shift.
2717   if (!Count)
2718     return;
2719 
2720   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2721   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2722   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2723 
2724   // Fastpath for moving by whole words.
2725   if (BitShift == 0) {
2726     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2727   } else {
2728     while (Words-- > WordShift) {
2729       Dst[Words] = Dst[Words - WordShift] << BitShift;
2730       if (Words > WordShift)
2731         Dst[Words] |=
2732           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2733     }
2734   }
2735 
2736   // Fill in the remainder with 0s.
2737   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2738 }
2739 
2740 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2741 /// are no restrictions on Count.
2742 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2743   // Don't bother performing a no-op shift.
2744   if (!Count)
2745     return;
2746 
2747   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2748   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2749   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2750 
2751   unsigned WordsToMove = Words - WordShift;
2752   // Fastpath for moving by whole words.
2753   if (BitShift == 0) {
2754     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2755   } else {
2756     for (unsigned i = 0; i != WordsToMove; ++i) {
2757       Dst[i] = Dst[i + WordShift] >> BitShift;
2758       if (i + 1 != WordsToMove)
2759         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2760     }
2761   }
2762 
2763   // Fill in the remainder with 0s.
2764   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2765 }
2766 
2767 /* Bitwise and of two bignums.  */
2768 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2769   for (unsigned i = 0; i < parts; i++)
2770     dst[i] &= rhs[i];
2771 }
2772 
2773 /* Bitwise inclusive or of two bignums.  */
2774 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2775   for (unsigned i = 0; i < parts; i++)
2776     dst[i] |= rhs[i];
2777 }
2778 
2779 /* Bitwise exclusive or of two bignums.  */
2780 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2781   for (unsigned i = 0; i < parts; i++)
2782     dst[i] ^= rhs[i];
2783 }
2784 
2785 /* Complement a bignum in-place.  */
2786 void APInt::tcComplement(WordType *dst, unsigned parts) {
2787   for (unsigned i = 0; i < parts; i++)
2788     dst[i] = ~dst[i];
2789 }
2790 
2791 /* Comparison (unsigned) of two bignums.  */
2792 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2793                      unsigned parts) {
2794   while (parts) {
2795     parts--;
2796     if (lhs[parts] != rhs[parts])
2797       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2798   }
2799 
2800   return 0;
2801 }
2802 
2803 /* Set the least significant BITS bits of a bignum, clear the
2804    rest.  */
2805 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2806                                       unsigned bits) {
2807   unsigned i = 0;
2808   while (bits > APINT_BITS_PER_WORD) {
2809     dst[i++] = ~(WordType) 0;
2810     bits -= APINT_BITS_PER_WORD;
2811   }
2812 
2813   if (bits)
2814     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2815 
2816   while (i < parts)
2817     dst[i++] = 0;
2818 }
2819 
2820 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2821                                    APInt::Rounding RM) {
2822   // Currently udivrem always rounds down.
2823   switch (RM) {
2824   case APInt::Rounding::DOWN:
2825   case APInt::Rounding::TOWARD_ZERO:
2826     return A.udiv(B);
2827   case APInt::Rounding::UP: {
2828     APInt Quo, Rem;
2829     APInt::udivrem(A, B, Quo, Rem);
2830     if (Rem == 0)
2831       return Quo;
2832     return Quo + 1;
2833   }
2834   }
2835   llvm_unreachable("Unknown APInt::Rounding enum");
2836 }
2837 
2838 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2839                                    APInt::Rounding RM) {
2840   switch (RM) {
2841   case APInt::Rounding::DOWN:
2842   case APInt::Rounding::UP: {
2843     APInt Quo, Rem;
2844     APInt::sdivrem(A, B, Quo, Rem);
2845     if (Rem == 0)
2846       return Quo;
2847     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2848     // We want to check whether the non-integer part of the mathematical value
2849     // is negative or not. If the non-integer part is negative, we need to round
2850     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2851     // already rounded down.
2852     if (RM == APInt::Rounding::DOWN) {
2853       if (Rem.isNegative() != B.isNegative())
2854         return Quo - 1;
2855       return Quo;
2856     }
2857     if (Rem.isNegative() != B.isNegative())
2858       return Quo;
2859     return Quo + 1;
2860   }
2861   // Currently sdiv rounds towards zero.
2862   case APInt::Rounding::TOWARD_ZERO:
2863     return A.sdiv(B);
2864   }
2865   llvm_unreachable("Unknown APInt::Rounding enum");
2866 }
2867 
2868 Optional<APInt>
2869 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2870                                            unsigned RangeWidth) {
2871   unsigned CoeffWidth = A.getBitWidth();
2872   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2873   assert(RangeWidth <= CoeffWidth &&
2874          "Value range width should be less than coefficient width");
2875   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2876 
2877   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2878                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2879 
2880   // Identify 0 as a (non)solution immediately.
2881   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2882     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2883     return APInt(CoeffWidth, 0);
2884   }
2885 
2886   // The result of APInt arithmetic has the same bit width as the operands,
2887   // so it can actually lose high bits. A product of two n-bit integers needs
2888   // 2n-1 bits to represent the full value.
2889   // The operation done below (on quadratic coefficients) that can produce
2890   // the largest value is the evaluation of the equation during bisection,
2891   // which needs 3 times the bitwidth of the coefficient, so the total number
2892   // of required bits is 3n.
2893   //
2894   // The purpose of this extension is to simulate the set Z of all integers,
2895   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2896   // and negative numbers (not so much in a modulo arithmetic). The method
2897   // used to solve the equation is based on the standard formula for real
2898   // numbers, and uses the concepts of "positive" and "negative" with their
2899   // usual meanings.
2900   CoeffWidth *= 3;
2901   A = A.sext(CoeffWidth);
2902   B = B.sext(CoeffWidth);
2903   C = C.sext(CoeffWidth);
2904 
2905   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2906   // the bit width has increased.
2907   if (A.isNegative()) {
2908     A.negate();
2909     B.negate();
2910     C.negate();
2911   }
2912 
2913   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2914   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2915   // and R = 2^BitWidth.
2916   // Since we're trying not only to find exact solutions, but also values
2917   // that "wrap around", such a set will always have a solution, i.e. an x
2918   // that satisfies at least one of the equations, or such that |q(x)|
2919   // exceeds kR, while |q(x-1)| for the same k does not.
2920   //
2921   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2922   // positive solution n (in the above sense), and also such that the n
2923   // will be the least among all solutions corresponding to k = 0, 1, ...
2924   // (more precisely, the least element in the set
2925   //   { n(k) | k is such that a solution n(k) exists }).
2926   //
2927   // Consider the parabola (over real numbers) that corresponds to the
2928   // quadratic equation. Since A > 0, the arms of the parabola will point
2929   // up. Picking different values of k will shift it up and down by R.
2930   //
2931   // We want to shift the parabola in such a way as to reduce the problem
2932   // of solving q(x) = kR to solving shifted_q(x) = 0.
2933   // (The interesting solutions are the ceilings of the real number
2934   // solutions.)
2935   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2936   APInt TwoA = 2 * A;
2937   APInt SqrB = B * B;
2938   bool PickLow;
2939 
2940   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2941     assert(A.isStrictlyPositive());
2942     APInt T = V.abs().urem(A);
2943     if (T.isNullValue())
2944       return V;
2945     return V.isNegative() ? V+T : V+(A-T);
2946   };
2947 
2948   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2949   // iff B is positive.
2950   if (B.isNonNegative()) {
2951     // If B >= 0, the vertex it at a negative location (or at 0), so in
2952     // order to have a non-negative solution we need to pick k that makes
2953     // C-kR negative. To satisfy all the requirements for the solution
2954     // that we are looking for, it needs to be closest to 0 of all k.
2955     C = C.srem(R);
2956     if (C.isStrictlyPositive())
2957       C -= R;
2958     // Pick the greater solution.
2959     PickLow = false;
2960   } else {
2961     // If B < 0, the vertex is at a positive location. For any solution
2962     // to exist, the discriminant must be non-negative. This means that
2963     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2964     // lower bound on values of k: kR >= C - B^2/4A.
2965     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2966     // Round LowkR up (towards +inf) to the nearest kR.
2967     LowkR = RoundUp(LowkR, R);
2968 
2969     // If there exists k meeting the condition above, and such that
2970     // C-kR > 0, there will be two positive real number solutions of
2971     // q(x) = kR. Out of all such values of k, pick the one that makes
2972     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2973     // In other words, find maximum k such that LowkR <= kR < C.
2974     if (C.sgt(LowkR)) {
2975       // If LowkR < C, then such a k is guaranteed to exist because
2976       // LowkR itself is a multiple of R.
2977       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2978       // Pick the smaller solution.
2979       PickLow = true;
2980     } else {
2981       // If C-kR < 0 for all potential k's, it means that one solution
2982       // will be negative, while the other will be positive. The positive
2983       // solution will shift towards 0 if the parabola is moved up.
2984       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2985       // to 0, or in other words, out of all parabolas that have solutions,
2986       // pick the one that is the farthest "up").
2987       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2988       C -= LowkR;
2989       // Pick the greater solution.
2990       PickLow = false;
2991     }
2992   }
2993 
2994   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2995                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2996 
2997   APInt D = SqrB - 4*A*C;
2998   assert(D.isNonNegative() && "Negative discriminant");
2999   APInt SQ = D.sqrt();
3000 
3001   APInt Q = SQ * SQ;
3002   bool InexactSQ = Q != D;
3003   // The calculated SQ may actually be greater than the exact (non-integer)
3004   // value. If that's the case, decrement SQ to get a value that is lower.
3005   if (Q.sgt(D))
3006     SQ -= 1;
3007 
3008   APInt X;
3009   APInt Rem;
3010 
3011   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
3012   // When using the quadratic formula directly, the calculated low root
3013   // may be greater than the exact one, since we would be subtracting SQ.
3014   // To make sure that the calculated root is not greater than the exact
3015   // one, subtract SQ+1 when calculating the low root (for inexact value
3016   // of SQ).
3017   if (PickLow)
3018     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
3019   else
3020     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
3021 
3022   // The updated coefficients should be such that the (exact) solution is
3023   // positive. Since APInt division rounds towards 0, the calculated one
3024   // can be 0, but cannot be negative.
3025   assert(X.isNonNegative() && "Solution should be non-negative");
3026 
3027   if (!InexactSQ && Rem.isNullValue()) {
3028     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3029     return X;
3030   }
3031 
3032   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3033   // The exact value of the square root of D should be between SQ and SQ+1.
3034   // This implies that the solution should be between that corresponding to
3035   // SQ (i.e. X) and that corresponding to SQ+1.
3036   //
3037   // The calculated X cannot be greater than the exact (real) solution.
3038   // Actually it must be strictly less than the exact solution, while
3039   // X+1 will be greater than or equal to it.
3040 
3041   APInt VX = (A*X + B)*X + C;
3042   APInt VY = VX + TwoA*X + A + B;
3043   bool SignChange = VX.isNegative() != VY.isNegative() ||
3044                     VX.isNullValue() != VY.isNullValue();
3045   // If the sign did not change between X and X+1, X is not a valid solution.
3046   // This could happen when the actual (exact) roots don't have an integer
3047   // between them, so they would both be contained between X and X+1.
3048   if (!SignChange) {
3049     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3050     return None;
3051   }
3052 
3053   X += 1;
3054   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3055   return X;
3056 }
3057 
3058 Optional<unsigned>
3059 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3060   assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3061   if (A == B)
3062     return llvm::None;
3063   return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3064 }
3065 
3066 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3067 /// with the integer held in IntVal.
3068 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3069                             unsigned StoreBytes) {
3070   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3071   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3072 
3073   if (sys::IsLittleEndianHost) {
3074     // Little-endian host - the source is ordered from LSB to MSB.  Order the
3075     // destination from LSB to MSB: Do a straight copy.
3076     memcpy(Dst, Src, StoreBytes);
3077   } else {
3078     // Big-endian host - the source is an array of 64 bit words ordered from
3079     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3080     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3081     while (StoreBytes > sizeof(uint64_t)) {
3082       StoreBytes -= sizeof(uint64_t);
3083       // May not be aligned so use memcpy.
3084       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3085       Src += sizeof(uint64_t);
3086     }
3087 
3088     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3089   }
3090 }
3091 
3092 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3093 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3094 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
3095                              unsigned LoadBytes) {
3096   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3097   uint8_t *Dst = reinterpret_cast<uint8_t *>(
3098                    const_cast<uint64_t *>(IntVal.getRawData()));
3099 
3100   if (sys::IsLittleEndianHost)
3101     // Little-endian host - the destination must be ordered from LSB to MSB.
3102     // The source is ordered from LSB to MSB: Do a straight copy.
3103     memcpy(Dst, Src, LoadBytes);
3104   else {
3105     // Big-endian - the destination is an array of 64 bit words ordered from
3106     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3107     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3108     // a word.
3109     while (LoadBytes > sizeof(uint64_t)) {
3110       LoadBytes -= sizeof(uint64_t);
3111       // May not be aligned so use memcpy.
3112       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3113       Dst += sizeof(uint64_t);
3114     }
3115 
3116     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3117   }
3118 }
3119