1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APint to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246 
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250 
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254 
255 APInt& APInt::operator*=(const APInt& RHS) {
256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORDTYPE_MAX;
328 }
329 
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332   tcComplement(U.pVal, getNumWords());
333   clearUnusedBits();
334 }
335 
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
341   if ((*this)[bitPosition]) clearBit(bitPosition);
342   else setBit(bitPosition);
343 }
344 
345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346   unsigned subBitWidth = subBits.getBitWidth();
347   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348          "Illegal bit insertion");
349 
350   // Insertion is a direct copy.
351   if (subBitWidth == BitWidth) {
352     *this = subBits;
353     return;
354   }
355 
356   // Single word result can be done as a direct bitmask.
357   if (isSingleWord()) {
358     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359     U.VAL &= ~(mask << bitPosition);
360     U.VAL |= (subBits.U.VAL << bitPosition);
361     return;
362   }
363 
364   unsigned loBit = whichBit(bitPosition);
365   unsigned loWord = whichWord(bitPosition);
366   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367 
368   // Insertion within a single word can be done as a direct bitmask.
369   if (loWord == hi1Word) {
370     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371     U.pVal[loWord] &= ~(mask << loBit);
372     U.pVal[loWord] |= (subBits.U.VAL << loBit);
373     return;
374   }
375 
376   // Insert on word boundaries.
377   if (loBit == 0) {
378     // Direct copy whole words.
379     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380     memcpy(U.pVal + loWord, subBits.getRawData(),
381            numWholeSubWords * APINT_WORD_SIZE);
382 
383     // Mask+insert remaining bits.
384     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385     if (remainingBits != 0) {
386       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387       U.pVal[hi1Word] &= ~mask;
388       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389     }
390     return;
391   }
392 
393   // General case - set/clear individual bits in dst based on src.
394   // TODO - there is scope for optimization here, but at the moment this code
395   // path is barely used so prefer readability over performance.
396   for (unsigned i = 0; i != subBitWidth; ++i) {
397     if (subBits[i])
398       setBit(bitPosition + i);
399     else
400       clearBit(bitPosition + i);
401   }
402 }
403 
404 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
405   assert(numBits > 0 && "Can't extract zero bits");
406   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
407          "Illegal bit extraction");
408 
409   if (isSingleWord())
410     return APInt(numBits, U.VAL >> bitPosition);
411 
412   unsigned loBit = whichBit(bitPosition);
413   unsigned loWord = whichWord(bitPosition);
414   unsigned hiWord = whichWord(bitPosition + numBits - 1);
415 
416   // Single word result extracting bits from a single word source.
417   if (loWord == hiWord)
418     return APInt(numBits, U.pVal[loWord] >> loBit);
419 
420   // Extracting bits that start on a source word boundary can be done
421   // as a fast memory copy.
422   if (loBit == 0)
423     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
424 
425   // General case - shift + copy source words directly into place.
426   APInt Result(numBits, 0);
427   unsigned NumSrcWords = getNumWords();
428   unsigned NumDstWords = Result.getNumWords();
429 
430   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
431   for (unsigned word = 0; word < NumDstWords; ++word) {
432     uint64_t w0 = U.pVal[loWord + word];
433     uint64_t w1 =
434         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
435     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
436   }
437 
438   return Result.clearUnusedBits();
439 }
440 
441 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
442   assert(!str.empty() && "Invalid string length");
443   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
444           radix == 36) &&
445          "Radix should be 2, 8, 10, 16, or 36!");
446 
447   size_t slen = str.size();
448 
449   // Each computation below needs to know if it's negative.
450   StringRef::iterator p = str.begin();
451   unsigned isNegative = *p == '-';
452   if (*p == '-' || *p == '+') {
453     p++;
454     slen--;
455     assert(slen && "String is only a sign, needs a value.");
456   }
457 
458   // For radixes of power-of-two values, the bits required is accurately and
459   // easily computed
460   if (radix == 2)
461     return slen + isNegative;
462   if (radix == 8)
463     return slen * 3 + isNegative;
464   if (radix == 16)
465     return slen * 4 + isNegative;
466 
467   // FIXME: base 36
468 
469   // This is grossly inefficient but accurate. We could probably do something
470   // with a computation of roughly slen*64/20 and then adjust by the value of
471   // the first few digits. But, I'm not sure how accurate that could be.
472 
473   // Compute a sufficient number of bits that is always large enough but might
474   // be too large. This avoids the assertion in the constructor. This
475   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
476   // bits in that case.
477   unsigned sufficient
478     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
479                  : (slen == 1 ? 7 : slen * 16/3);
480 
481   // Convert to the actual binary value.
482   APInt tmp(sufficient, StringRef(p, slen), radix);
483 
484   // Compute how many bits are required. If the log is infinite, assume we need
485   // just bit.
486   unsigned log = tmp.logBase2();
487   if (log == (unsigned)-1) {
488     return isNegative + 1;
489   } else {
490     return isNegative + log + 1;
491   }
492 }
493 
494 hash_code llvm::hash_value(const APInt &Arg) {
495   if (Arg.isSingleWord())
496     return hash_combine(Arg.U.VAL);
497 
498   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
499 }
500 
501 bool APInt::isSplat(unsigned SplatSizeInBits) const {
502   assert(getBitWidth() % SplatSizeInBits == 0 &&
503          "SplatSizeInBits must divide width!");
504   // We can check that all parts of an integer are equal by making use of a
505   // little trick: rotate and check if it's still the same value.
506   return *this == rotl(SplatSizeInBits);
507 }
508 
509 /// This function returns the high "numBits" bits of this APInt.
510 APInt APInt::getHiBits(unsigned numBits) const {
511   return this->lshr(BitWidth - numBits);
512 }
513 
514 /// This function returns the low "numBits" bits of this APInt.
515 APInt APInt::getLoBits(unsigned numBits) const {
516   APInt Result(getLowBitsSet(BitWidth, numBits));
517   Result &= *this;
518   return Result;
519 }
520 
521 /// Return a value containing V broadcasted over NewLen bits.
522 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
523   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
524 
525   APInt Val = V.zextOrSelf(NewLen);
526   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
527     Val |= Val << I;
528 
529   return Val;
530 }
531 
532 unsigned APInt::countLeadingZerosSlowCase() const {
533   unsigned Count = 0;
534   for (int i = getNumWords()-1; i >= 0; --i) {
535     uint64_t V = U.pVal[i];
536     if (V == 0)
537       Count += APINT_BITS_PER_WORD;
538     else {
539       Count += llvm::countLeadingZeros(V);
540       break;
541     }
542   }
543   // Adjust for unused bits in the most significant word (they are zero).
544   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
545   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
546   return Count;
547 }
548 
549 unsigned APInt::countLeadingOnesSlowCase() const {
550   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
551   unsigned shift;
552   if (!highWordBits) {
553     highWordBits = APINT_BITS_PER_WORD;
554     shift = 0;
555   } else {
556     shift = APINT_BITS_PER_WORD - highWordBits;
557   }
558   int i = getNumWords() - 1;
559   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
560   if (Count == highWordBits) {
561     for (i--; i >= 0; --i) {
562       if (U.pVal[i] == WORDTYPE_MAX)
563         Count += APINT_BITS_PER_WORD;
564       else {
565         Count += llvm::countLeadingOnes(U.pVal[i]);
566         break;
567       }
568     }
569   }
570   return Count;
571 }
572 
573 unsigned APInt::countTrailingZerosSlowCase() const {
574   unsigned Count = 0;
575   unsigned i = 0;
576   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
577     Count += APINT_BITS_PER_WORD;
578   if (i < getNumWords())
579     Count += llvm::countTrailingZeros(U.pVal[i]);
580   return std::min(Count, BitWidth);
581 }
582 
583 unsigned APInt::countTrailingOnesSlowCase() const {
584   unsigned Count = 0;
585   unsigned i = 0;
586   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
587     Count += APINT_BITS_PER_WORD;
588   if (i < getNumWords())
589     Count += llvm::countTrailingOnes(U.pVal[i]);
590   assert(Count <= BitWidth);
591   return Count;
592 }
593 
594 unsigned APInt::countPopulationSlowCase() const {
595   unsigned Count = 0;
596   for (unsigned i = 0; i < getNumWords(); ++i)
597     Count += llvm::countPopulation(U.pVal[i]);
598   return Count;
599 }
600 
601 bool APInt::intersectsSlowCase(const APInt &RHS) const {
602   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
603     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
604       return true;
605 
606   return false;
607 }
608 
609 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
610   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
611     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
612       return false;
613 
614   return true;
615 }
616 
617 APInt APInt::byteSwap() const {
618   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
619   if (BitWidth == 16)
620     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
621   if (BitWidth == 32)
622     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
623   if (BitWidth == 48) {
624     unsigned Tmp1 = unsigned(U.VAL >> 16);
625     Tmp1 = ByteSwap_32(Tmp1);
626     uint16_t Tmp2 = uint16_t(U.VAL);
627     Tmp2 = ByteSwap_16(Tmp2);
628     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
629   }
630   if (BitWidth == 64)
631     return APInt(BitWidth, ByteSwap_64(U.VAL));
632 
633   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
634   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
635     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
636   if (Result.BitWidth != BitWidth) {
637     Result.lshrInPlace(Result.BitWidth - BitWidth);
638     Result.BitWidth = BitWidth;
639   }
640   return Result;
641 }
642 
643 APInt APInt::reverseBits() const {
644   switch (BitWidth) {
645   case 64:
646     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
647   case 32:
648     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
649   case 16:
650     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
651   case 8:
652     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
653   default:
654     break;
655   }
656 
657   APInt Val(*this);
658   APInt Reversed(BitWidth, 0);
659   unsigned S = BitWidth;
660 
661   for (; Val != 0; Val.lshrInPlace(1)) {
662     Reversed <<= 1;
663     Reversed |= Val[0];
664     --S;
665   }
666 
667   Reversed <<= S;
668   return Reversed;
669 }
670 
671 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
672   // Fast-path a common case.
673   if (A == B) return A;
674 
675   // Corner cases: if either operand is zero, the other is the gcd.
676   if (!A) return B;
677   if (!B) return A;
678 
679   // Count common powers of 2 and remove all other powers of 2.
680   unsigned Pow2;
681   {
682     unsigned Pow2_A = A.countTrailingZeros();
683     unsigned Pow2_B = B.countTrailingZeros();
684     if (Pow2_A > Pow2_B) {
685       A.lshrInPlace(Pow2_A - Pow2_B);
686       Pow2 = Pow2_B;
687     } else if (Pow2_B > Pow2_A) {
688       B.lshrInPlace(Pow2_B - Pow2_A);
689       Pow2 = Pow2_A;
690     } else {
691       Pow2 = Pow2_A;
692     }
693   }
694 
695   // Both operands are odd multiples of 2^Pow_2:
696   //
697   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
698   //
699   // This is a modified version of Stein's algorithm, taking advantage of
700   // efficient countTrailingZeros().
701   while (A != B) {
702     if (A.ugt(B)) {
703       A -= B;
704       A.lshrInPlace(A.countTrailingZeros() - Pow2);
705     } else {
706       B -= A;
707       B.lshrInPlace(B.countTrailingZeros() - Pow2);
708     }
709   }
710 
711   return A;
712 }
713 
714 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
715   uint64_t I = bit_cast<uint64_t>(Double);
716 
717   // Get the sign bit from the highest order bit
718   bool isNeg = I >> 63;
719 
720   // Get the 11-bit exponent and adjust for the 1023 bit bias
721   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
722 
723   // If the exponent is negative, the value is < 0 so just return 0.
724   if (exp < 0)
725     return APInt(width, 0u);
726 
727   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
728   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
729 
730   // If the exponent doesn't shift all bits out of the mantissa
731   if (exp < 52)
732     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
733                     APInt(width, mantissa >> (52 - exp));
734 
735   // If the client didn't provide enough bits for us to shift the mantissa into
736   // then the result is undefined, just return 0
737   if (width <= exp - 52)
738     return APInt(width, 0);
739 
740   // Otherwise, we have to shift the mantissa bits up to the right location
741   APInt Tmp(width, mantissa);
742   Tmp <<= (unsigned)exp - 52;
743   return isNeg ? -Tmp : Tmp;
744 }
745 
746 /// This function converts this APInt to a double.
747 /// The layout for double is as following (IEEE Standard 754):
748 ///  --------------------------------------
749 /// |  Sign    Exponent    Fraction    Bias |
750 /// |-------------------------------------- |
751 /// |  1[63]   11[62-52]   52[51-00]   1023 |
752 ///  --------------------------------------
753 double APInt::roundToDouble(bool isSigned) const {
754 
755   // Handle the simple case where the value is contained in one uint64_t.
756   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
757   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
758     if (isSigned) {
759       int64_t sext = SignExtend64(getWord(0), BitWidth);
760       return double(sext);
761     } else
762       return double(getWord(0));
763   }
764 
765   // Determine if the value is negative.
766   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
767 
768   // Construct the absolute value if we're negative.
769   APInt Tmp(isNeg ? -(*this) : (*this));
770 
771   // Figure out how many bits we're using.
772   unsigned n = Tmp.getActiveBits();
773 
774   // The exponent (without bias normalization) is just the number of bits
775   // we are using. Note that the sign bit is gone since we constructed the
776   // absolute value.
777   uint64_t exp = n;
778 
779   // Return infinity for exponent overflow
780   if (exp > 1023) {
781     if (!isSigned || !isNeg)
782       return std::numeric_limits<double>::infinity();
783     else
784       return -std::numeric_limits<double>::infinity();
785   }
786   exp += 1023; // Increment for 1023 bias
787 
788   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
789   // extract the high 52 bits from the correct words in pVal.
790   uint64_t mantissa;
791   unsigned hiWord = whichWord(n-1);
792   if (hiWord == 0) {
793     mantissa = Tmp.U.pVal[0];
794     if (n > 52)
795       mantissa >>= n - 52; // shift down, we want the top 52 bits.
796   } else {
797     assert(hiWord > 0 && "huh?");
798     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
799     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
800     mantissa = hibits | lobits;
801   }
802 
803   // The leading bit of mantissa is implicit, so get rid of it.
804   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
805   uint64_t I = sign | (exp << 52) | mantissa;
806   return bit_cast<double>(I);
807 }
808 
809 // Truncate to new width.
810 APInt APInt::trunc(unsigned width) const {
811   assert(width < BitWidth && "Invalid APInt Truncate request");
812   assert(width && "Can't truncate to 0 bits");
813 
814   if (width <= APINT_BITS_PER_WORD)
815     return APInt(width, getRawData()[0]);
816 
817   APInt Result(getMemory(getNumWords(width)), width);
818 
819   // Copy full words.
820   unsigned i;
821   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
822     Result.U.pVal[i] = U.pVal[i];
823 
824   // Truncate and copy any partial word.
825   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
826   if (bits != 0)
827     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
828 
829   return Result;
830 }
831 
832 // Sign extend to a new width.
833 APInt APInt::sext(unsigned Width) const {
834   assert(Width > BitWidth && "Invalid APInt SignExtend request");
835 
836   if (Width <= APINT_BITS_PER_WORD)
837     return APInt(Width, SignExtend64(U.VAL, BitWidth));
838 
839   APInt Result(getMemory(getNumWords(Width)), Width);
840 
841   // Copy words.
842   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
843 
844   // Sign extend the last word since there may be unused bits in the input.
845   Result.U.pVal[getNumWords() - 1] =
846       SignExtend64(Result.U.pVal[getNumWords() - 1],
847                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
848 
849   // Fill with sign bits.
850   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
851               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
852   Result.clearUnusedBits();
853   return Result;
854 }
855 
856 //  Zero extend to a new width.
857 APInt APInt::zext(unsigned width) const {
858   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
859 
860   if (width <= APINT_BITS_PER_WORD)
861     return APInt(width, U.VAL);
862 
863   APInt Result(getMemory(getNumWords(width)), width);
864 
865   // Copy words.
866   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
867 
868   // Zero remaining words.
869   std::memset(Result.U.pVal + getNumWords(), 0,
870               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
871 
872   return Result;
873 }
874 
875 APInt APInt::zextOrTrunc(unsigned width) const {
876   if (BitWidth < width)
877     return zext(width);
878   if (BitWidth > width)
879     return trunc(width);
880   return *this;
881 }
882 
883 APInt APInt::sextOrTrunc(unsigned width) const {
884   if (BitWidth < width)
885     return sext(width);
886   if (BitWidth > width)
887     return trunc(width);
888   return *this;
889 }
890 
891 APInt APInt::zextOrSelf(unsigned width) const {
892   if (BitWidth < width)
893     return zext(width);
894   return *this;
895 }
896 
897 APInt APInt::sextOrSelf(unsigned width) const {
898   if (BitWidth < width)
899     return sext(width);
900   return *this;
901 }
902 
903 /// Arithmetic right-shift this APInt by shiftAmt.
904 /// Arithmetic right-shift function.
905 void APInt::ashrInPlace(const APInt &shiftAmt) {
906   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
907 }
908 
909 /// Arithmetic right-shift this APInt by shiftAmt.
910 /// Arithmetic right-shift function.
911 void APInt::ashrSlowCase(unsigned ShiftAmt) {
912   // Don't bother performing a no-op shift.
913   if (!ShiftAmt)
914     return;
915 
916   // Save the original sign bit for later.
917   bool Negative = isNegative();
918 
919   // WordShift is the inter-part shift; BitShift is intra-part shift.
920   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
921   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
922 
923   unsigned WordsToMove = getNumWords() - WordShift;
924   if (WordsToMove != 0) {
925     // Sign extend the last word to fill in the unused bits.
926     U.pVal[getNumWords() - 1] = SignExtend64(
927         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
928 
929     // Fastpath for moving by whole words.
930     if (BitShift == 0) {
931       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
932     } else {
933       // Move the words containing significant bits.
934       for (unsigned i = 0; i != WordsToMove - 1; ++i)
935         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
936                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
937 
938       // Handle the last word which has no high bits to copy.
939       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
940       // Sign extend one more time.
941       U.pVal[WordsToMove - 1] =
942           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
943     }
944   }
945 
946   // Fill in the remainder based on the original sign.
947   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
948               WordShift * APINT_WORD_SIZE);
949   clearUnusedBits();
950 }
951 
952 /// Logical right-shift this APInt by shiftAmt.
953 /// Logical right-shift function.
954 void APInt::lshrInPlace(const APInt &shiftAmt) {
955   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
956 }
957 
958 /// Logical right-shift this APInt by shiftAmt.
959 /// Logical right-shift function.
960 void APInt::lshrSlowCase(unsigned ShiftAmt) {
961   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
962 }
963 
964 /// Left-shift this APInt by shiftAmt.
965 /// Left-shift function.
966 APInt &APInt::operator<<=(const APInt &shiftAmt) {
967   // It's undefined behavior in C to shift by BitWidth or greater.
968   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
969   return *this;
970 }
971 
972 void APInt::shlSlowCase(unsigned ShiftAmt) {
973   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
974   clearUnusedBits();
975 }
976 
977 // Calculate the rotate amount modulo the bit width.
978 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
979   unsigned rotBitWidth = rotateAmt.getBitWidth();
980   APInt rot = rotateAmt;
981   if (rotBitWidth < BitWidth) {
982     // Extend the rotate APInt, so that the urem doesn't divide by 0.
983     // e.g. APInt(1, 32) would give APInt(1, 0).
984     rot = rotateAmt.zext(BitWidth);
985   }
986   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
987   return rot.getLimitedValue(BitWidth);
988 }
989 
990 APInt APInt::rotl(const APInt &rotateAmt) const {
991   return rotl(rotateModulo(BitWidth, rotateAmt));
992 }
993 
994 APInt APInt::rotl(unsigned rotateAmt) const {
995   rotateAmt %= BitWidth;
996   if (rotateAmt == 0)
997     return *this;
998   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
999 }
1000 
1001 APInt APInt::rotr(const APInt &rotateAmt) const {
1002   return rotr(rotateModulo(BitWidth, rotateAmt));
1003 }
1004 
1005 APInt APInt::rotr(unsigned rotateAmt) const {
1006   rotateAmt %= BitWidth;
1007   if (rotateAmt == 0)
1008     return *this;
1009   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1010 }
1011 
1012 // Square Root - this method computes and returns the square root of "this".
1013 // Three mechanisms are used for computation. For small values (<= 5 bits),
1014 // a table lookup is done. This gets some performance for common cases. For
1015 // values using less than 52 bits, the value is converted to double and then
1016 // the libc sqrt function is called. The result is rounded and then converted
1017 // back to a uint64_t which is then used to construct the result. Finally,
1018 // the Babylonian method for computing square roots is used.
1019 APInt APInt::sqrt() const {
1020 
1021   // Determine the magnitude of the value.
1022   unsigned magnitude = getActiveBits();
1023 
1024   // Use a fast table for some small values. This also gets rid of some
1025   // rounding errors in libc sqrt for small values.
1026   if (magnitude <= 5) {
1027     static const uint8_t results[32] = {
1028       /*     0 */ 0,
1029       /*  1- 2 */ 1, 1,
1030       /*  3- 6 */ 2, 2, 2, 2,
1031       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1032       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1033       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1034       /*    31 */ 6
1035     };
1036     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1037   }
1038 
1039   // If the magnitude of the value fits in less than 52 bits (the precision of
1040   // an IEEE double precision floating point value), then we can use the
1041   // libc sqrt function which will probably use a hardware sqrt computation.
1042   // This should be faster than the algorithm below.
1043   if (magnitude < 52) {
1044     return APInt(BitWidth,
1045                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1046                                                                : U.pVal[0])))));
1047   }
1048 
1049   // Okay, all the short cuts are exhausted. We must compute it. The following
1050   // is a classical Babylonian method for computing the square root. This code
1051   // was adapted to APInt from a wikipedia article on such computations.
1052   // See http://www.wikipedia.org/ and go to the page named
1053   // Calculate_an_integer_square_root.
1054   unsigned nbits = BitWidth, i = 4;
1055   APInt testy(BitWidth, 16);
1056   APInt x_old(BitWidth, 1);
1057   APInt x_new(BitWidth, 0);
1058   APInt two(BitWidth, 2);
1059 
1060   // Select a good starting value using binary logarithms.
1061   for (;; i += 2, testy = testy.shl(2))
1062     if (i >= nbits || this->ule(testy)) {
1063       x_old = x_old.shl(i / 2);
1064       break;
1065     }
1066 
1067   // Use the Babylonian method to arrive at the integer square root:
1068   for (;;) {
1069     x_new = (this->udiv(x_old) + x_old).udiv(two);
1070     if (x_old.ule(x_new))
1071       break;
1072     x_old = x_new;
1073   }
1074 
1075   // Make sure we return the closest approximation
1076   // NOTE: The rounding calculation below is correct. It will produce an
1077   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1078   // determined to be a rounding issue with pari/gp as it begins to use a
1079   // floating point representation after 192 bits. There are no discrepancies
1080   // between this algorithm and pari/gp for bit widths < 192 bits.
1081   APInt square(x_old * x_old);
1082   APInt nextSquare((x_old + 1) * (x_old +1));
1083   if (this->ult(square))
1084     return x_old;
1085   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1086   APInt midpoint((nextSquare - square).udiv(two));
1087   APInt offset(*this - square);
1088   if (offset.ult(midpoint))
1089     return x_old;
1090   return x_old + 1;
1091 }
1092 
1093 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1094 /// iterative extended Euclidean algorithm is used to solve for this value,
1095 /// however we simplify it to speed up calculating only the inverse, and take
1096 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1097 /// (potentially large) APInts around.
1098 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1099   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1100 
1101   // Using the properties listed at the following web page (accessed 06/21/08):
1102   //   http://www.numbertheory.org/php/euclid.html
1103   // (especially the properties numbered 3, 4 and 9) it can be proved that
1104   // BitWidth bits suffice for all the computations in the algorithm implemented
1105   // below. More precisely, this number of bits suffice if the multiplicative
1106   // inverse exists, but may not suffice for the general extended Euclidean
1107   // algorithm.
1108 
1109   APInt r[2] = { modulo, *this };
1110   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1111   APInt q(BitWidth, 0);
1112 
1113   unsigned i;
1114   for (i = 0; r[i^1] != 0; i ^= 1) {
1115     // An overview of the math without the confusing bit-flipping:
1116     // q = r[i-2] / r[i-1]
1117     // r[i] = r[i-2] % r[i-1]
1118     // t[i] = t[i-2] - t[i-1] * q
1119     udivrem(r[i], r[i^1], q, r[i]);
1120     t[i] -= t[i^1] * q;
1121   }
1122 
1123   // If this APInt and the modulo are not coprime, there is no multiplicative
1124   // inverse, so return 0. We check this by looking at the next-to-last
1125   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1126   // algorithm.
1127   if (r[i] != 1)
1128     return APInt(BitWidth, 0);
1129 
1130   // The next-to-last t is the multiplicative inverse.  However, we are
1131   // interested in a positive inverse. Calculate a positive one from a negative
1132   // one if necessary. A simple addition of the modulo suffices because
1133   // abs(t[i]) is known to be less than *this/2 (see the link above).
1134   if (t[i].isNegative())
1135     t[i] += modulo;
1136 
1137   return std::move(t[i]);
1138 }
1139 
1140 /// Calculate the magic numbers required to implement a signed integer division
1141 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1142 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1143 /// Warren, Jr., chapter 10.
1144 APInt::ms APInt::magic() const {
1145   const APInt& d = *this;
1146   unsigned p;
1147   APInt ad, anc, delta, q1, r1, q2, r2, t;
1148   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1149   struct ms mag;
1150 
1151   ad = d.abs();
1152   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1153   anc = t - 1 - t.urem(ad);   // absolute value of nc
1154   p = d.getBitWidth() - 1;    // initialize p
1155   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1156   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1157   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1158   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1159   do {
1160     p = p + 1;
1161     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1162     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1163     if (r1.uge(anc)) {  // must be unsigned comparison
1164       q1 = q1 + 1;
1165       r1 = r1 - anc;
1166     }
1167     q2 = q2<<1;          // update q2 = 2p/abs(d)
1168     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1169     if (r2.uge(ad)) {   // must be unsigned comparison
1170       q2 = q2 + 1;
1171       r2 = r2 - ad;
1172     }
1173     delta = ad - r2;
1174   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1175 
1176   mag.m = q2 + 1;
1177   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1178   mag.s = p - d.getBitWidth();          // resulting shift
1179   return mag;
1180 }
1181 
1182 /// Calculate the magic numbers required to implement an unsigned integer
1183 /// division by a constant as a sequence of multiplies, adds and shifts.
1184 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1185 /// S. Warren, Jr., chapter 10.
1186 /// LeadingZeros can be used to simplify the calculation if the upper bits
1187 /// of the divided value are known zero.
1188 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1189   const APInt& d = *this;
1190   unsigned p;
1191   APInt nc, delta, q1, r1, q2, r2;
1192   struct mu magu;
1193   magu.a = 0;               // initialize "add" indicator
1194   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1195   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1196   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1197 
1198   nc = allOnes - (allOnes - d).urem(d);
1199   p = d.getBitWidth() - 1;  // initialize p
1200   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1201   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1202   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1203   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1204   do {
1205     p = p + 1;
1206     if (r1.uge(nc - r1)) {
1207       q1 = q1 + q1 + 1;  // update q1
1208       r1 = r1 + r1 - nc; // update r1
1209     }
1210     else {
1211       q1 = q1+q1; // update q1
1212       r1 = r1+r1; // update r1
1213     }
1214     if ((r2 + 1).uge(d - r2)) {
1215       if (q2.uge(signedMax)) magu.a = 1;
1216       q2 = q2+q2 + 1;     // update q2
1217       r2 = r2+r2 + 1 - d; // update r2
1218     }
1219     else {
1220       if (q2.uge(signedMin)) magu.a = 1;
1221       q2 = q2+q2;     // update q2
1222       r2 = r2+r2 + 1; // update r2
1223     }
1224     delta = d - 1 - r2;
1225   } while (p < d.getBitWidth()*2 &&
1226            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1227   magu.m = q2 + 1; // resulting magic number
1228   magu.s = p - d.getBitWidth();  // resulting shift
1229   return magu;
1230 }
1231 
1232 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1233 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1234 /// variables here have the same names as in the algorithm. Comments explain
1235 /// the algorithm and any deviation from it.
1236 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1237                      unsigned m, unsigned n) {
1238   assert(u && "Must provide dividend");
1239   assert(v && "Must provide divisor");
1240   assert(q && "Must provide quotient");
1241   assert(u != v && u != q && v != q && "Must use different memory");
1242   assert(n>1 && "n must be > 1");
1243 
1244   // b denotes the base of the number system. In our case b is 2^32.
1245   const uint64_t b = uint64_t(1) << 32;
1246 
1247 // The DEBUG macros here tend to be spam in the debug output if you're not
1248 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1249 #ifdef KNUTH_DEBUG
1250 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1251 #else
1252 #define DEBUG_KNUTH(X) do {} while(false)
1253 #endif
1254 
1255   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1256   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1257   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1258   DEBUG_KNUTH(dbgs() << " by");
1259   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1260   DEBUG_KNUTH(dbgs() << '\n');
1261   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1262   // u and v by d. Note that we have taken Knuth's advice here to use a power
1263   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1264   // 2 allows us to shift instead of multiply and it is easy to determine the
1265   // shift amount from the leading zeros.  We are basically normalizing the u
1266   // and v so that its high bits are shifted to the top of v's range without
1267   // overflow. Note that this can require an extra word in u so that u must
1268   // be of length m+n+1.
1269   unsigned shift = countLeadingZeros(v[n-1]);
1270   uint32_t v_carry = 0;
1271   uint32_t u_carry = 0;
1272   if (shift) {
1273     for (unsigned i = 0; i < m+n; ++i) {
1274       uint32_t u_tmp = u[i] >> (32 - shift);
1275       u[i] = (u[i] << shift) | u_carry;
1276       u_carry = u_tmp;
1277     }
1278     for (unsigned i = 0; i < n; ++i) {
1279       uint32_t v_tmp = v[i] >> (32 - shift);
1280       v[i] = (v[i] << shift) | v_carry;
1281       v_carry = v_tmp;
1282     }
1283   }
1284   u[m+n] = u_carry;
1285 
1286   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1287   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1288   DEBUG_KNUTH(dbgs() << " by");
1289   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1290   DEBUG_KNUTH(dbgs() << '\n');
1291 
1292   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1293   int j = m;
1294   do {
1295     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1296     // D3. [Calculate q'.].
1297     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1298     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1299     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1300     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1301     // on v[n-2] determines at high speed most of the cases in which the trial
1302     // value qp is one too large, and it eliminates all cases where qp is two
1303     // too large.
1304     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1305     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1306     uint64_t qp = dividend / v[n-1];
1307     uint64_t rp = dividend % v[n-1];
1308     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1309       qp--;
1310       rp += v[n-1];
1311       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1312         qp--;
1313     }
1314     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1315 
1316     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1317     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1318     // consists of a simple multiplication by a one-place number, combined with
1319     // a subtraction.
1320     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1321     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1322     // true value plus b**(n+1), namely as the b's complement of
1323     // the true value, and a "borrow" to the left should be remembered.
1324     int64_t borrow = 0;
1325     for (unsigned i = 0; i < n; ++i) {
1326       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1327       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1328       u[j+i] = Lo_32(subres);
1329       borrow = Hi_32(p) - Hi_32(subres);
1330       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1331                         << ", borrow = " << borrow << '\n');
1332     }
1333     bool isNeg = u[j+n] < borrow;
1334     u[j+n] -= Lo_32(borrow);
1335 
1336     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1337     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1338     DEBUG_KNUTH(dbgs() << '\n');
1339 
1340     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1341     // negative, go to step D6; otherwise go on to step D7.
1342     q[j] = Lo_32(qp);
1343     if (isNeg) {
1344       // D6. [Add back]. The probability that this step is necessary is very
1345       // small, on the order of only 2/b. Make sure that test data accounts for
1346       // this possibility. Decrease q[j] by 1
1347       q[j]--;
1348       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1349       // A carry will occur to the left of u[j+n], and it should be ignored
1350       // since it cancels with the borrow that occurred in D4.
1351       bool carry = false;
1352       for (unsigned i = 0; i < n; i++) {
1353         uint32_t limit = std::min(u[j+i],v[i]);
1354         u[j+i] += v[i] + carry;
1355         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1356       }
1357       u[j+n] += carry;
1358     }
1359     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1360     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1361     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1362 
1363     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1364   } while (--j >= 0);
1365 
1366   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1367   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1368   DEBUG_KNUTH(dbgs() << '\n');
1369 
1370   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1371   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1372   // compute the remainder (urem uses this).
1373   if (r) {
1374     // The value d is expressed by the "shift" value above since we avoided
1375     // multiplication by d by using a shift left. So, all we have to do is
1376     // shift right here.
1377     if (shift) {
1378       uint32_t carry = 0;
1379       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1380       for (int i = n-1; i >= 0; i--) {
1381         r[i] = (u[i] >> shift) | carry;
1382         carry = u[i] << (32 - shift);
1383         DEBUG_KNUTH(dbgs() << " " << r[i]);
1384       }
1385     } else {
1386       for (int i = n-1; i >= 0; i--) {
1387         r[i] = u[i];
1388         DEBUG_KNUTH(dbgs() << " " << r[i]);
1389       }
1390     }
1391     DEBUG_KNUTH(dbgs() << '\n');
1392   }
1393   DEBUG_KNUTH(dbgs() << '\n');
1394 }
1395 
1396 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1397                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1398   assert(lhsWords >= rhsWords && "Fractional result");
1399 
1400   // First, compose the values into an array of 32-bit words instead of
1401   // 64-bit words. This is a necessity of both the "short division" algorithm
1402   // and the Knuth "classical algorithm" which requires there to be native
1403   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1404   // can't use 64-bit operands here because we don't have native results of
1405   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1406   // work on large-endian machines.
1407   unsigned n = rhsWords * 2;
1408   unsigned m = (lhsWords * 2) - n;
1409 
1410   // Allocate space for the temporary values we need either on the stack, if
1411   // it will fit, or on the heap if it won't.
1412   uint32_t SPACE[128];
1413   uint32_t *U = nullptr;
1414   uint32_t *V = nullptr;
1415   uint32_t *Q = nullptr;
1416   uint32_t *R = nullptr;
1417   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1418     U = &SPACE[0];
1419     V = &SPACE[m+n+1];
1420     Q = &SPACE[(m+n+1) + n];
1421     if (Remainder)
1422       R = &SPACE[(m+n+1) + n + (m+n)];
1423   } else {
1424     U = new uint32_t[m + n + 1];
1425     V = new uint32_t[n];
1426     Q = new uint32_t[m+n];
1427     if (Remainder)
1428       R = new uint32_t[n];
1429   }
1430 
1431   // Initialize the dividend
1432   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1433   for (unsigned i = 0; i < lhsWords; ++i) {
1434     uint64_t tmp = LHS[i];
1435     U[i * 2] = Lo_32(tmp);
1436     U[i * 2 + 1] = Hi_32(tmp);
1437   }
1438   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1439 
1440   // Initialize the divisor
1441   memset(V, 0, (n)*sizeof(uint32_t));
1442   for (unsigned i = 0; i < rhsWords; ++i) {
1443     uint64_t tmp = RHS[i];
1444     V[i * 2] = Lo_32(tmp);
1445     V[i * 2 + 1] = Hi_32(tmp);
1446   }
1447 
1448   // initialize the quotient and remainder
1449   memset(Q, 0, (m+n) * sizeof(uint32_t));
1450   if (Remainder)
1451     memset(R, 0, n * sizeof(uint32_t));
1452 
1453   // Now, adjust m and n for the Knuth division. n is the number of words in
1454   // the divisor. m is the number of words by which the dividend exceeds the
1455   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1456   // contain any zero words or the Knuth algorithm fails.
1457   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1458     n--;
1459     m++;
1460   }
1461   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1462     m--;
1463 
1464   // If we're left with only a single word for the divisor, Knuth doesn't work
1465   // so we implement the short division algorithm here. This is much simpler
1466   // and faster because we are certain that we can divide a 64-bit quantity
1467   // by a 32-bit quantity at hardware speed and short division is simply a
1468   // series of such operations. This is just like doing short division but we
1469   // are using base 2^32 instead of base 10.
1470   assert(n != 0 && "Divide by zero?");
1471   if (n == 1) {
1472     uint32_t divisor = V[0];
1473     uint32_t remainder = 0;
1474     for (int i = m; i >= 0; i--) {
1475       uint64_t partial_dividend = Make_64(remainder, U[i]);
1476       if (partial_dividend == 0) {
1477         Q[i] = 0;
1478         remainder = 0;
1479       } else if (partial_dividend < divisor) {
1480         Q[i] = 0;
1481         remainder = Lo_32(partial_dividend);
1482       } else if (partial_dividend == divisor) {
1483         Q[i] = 1;
1484         remainder = 0;
1485       } else {
1486         Q[i] = Lo_32(partial_dividend / divisor);
1487         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1488       }
1489     }
1490     if (R)
1491       R[0] = remainder;
1492   } else {
1493     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1494     // case n > 1.
1495     KnuthDiv(U, V, Q, R, m, n);
1496   }
1497 
1498   // If the caller wants the quotient
1499   if (Quotient) {
1500     for (unsigned i = 0; i < lhsWords; ++i)
1501       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1502   }
1503 
1504   // If the caller wants the remainder
1505   if (Remainder) {
1506     for (unsigned i = 0; i < rhsWords; ++i)
1507       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1508   }
1509 
1510   // Clean up the memory we allocated.
1511   if (U != &SPACE[0]) {
1512     delete [] U;
1513     delete [] V;
1514     delete [] Q;
1515     delete [] R;
1516   }
1517 }
1518 
1519 APInt APInt::udiv(const APInt &RHS) const {
1520   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1521 
1522   // First, deal with the easy case
1523   if (isSingleWord()) {
1524     assert(RHS.U.VAL != 0 && "Divide by zero?");
1525     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1526   }
1527 
1528   // Get some facts about the LHS and RHS number of bits and words
1529   unsigned lhsWords = getNumWords(getActiveBits());
1530   unsigned rhsBits  = RHS.getActiveBits();
1531   unsigned rhsWords = getNumWords(rhsBits);
1532   assert(rhsWords && "Divided by zero???");
1533 
1534   // Deal with some degenerate cases
1535   if (!lhsWords)
1536     // 0 / X ===> 0
1537     return APInt(BitWidth, 0);
1538   if (rhsBits == 1)
1539     // X / 1 ===> X
1540     return *this;
1541   if (lhsWords < rhsWords || this->ult(RHS))
1542     // X / Y ===> 0, iff X < Y
1543     return APInt(BitWidth, 0);
1544   if (*this == RHS)
1545     // X / X ===> 1
1546     return APInt(BitWidth, 1);
1547   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1548     // All high words are zero, just use native divide
1549     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1550 
1551   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1552   APInt Quotient(BitWidth, 0); // to hold result.
1553   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1554   return Quotient;
1555 }
1556 
1557 APInt APInt::udiv(uint64_t RHS) const {
1558   assert(RHS != 0 && "Divide by zero?");
1559 
1560   // First, deal with the easy case
1561   if (isSingleWord())
1562     return APInt(BitWidth, U.VAL / RHS);
1563 
1564   // Get some facts about the LHS words.
1565   unsigned lhsWords = getNumWords(getActiveBits());
1566 
1567   // Deal with some degenerate cases
1568   if (!lhsWords)
1569     // 0 / X ===> 0
1570     return APInt(BitWidth, 0);
1571   if (RHS == 1)
1572     // X / 1 ===> X
1573     return *this;
1574   if (this->ult(RHS))
1575     // X / Y ===> 0, iff X < Y
1576     return APInt(BitWidth, 0);
1577   if (*this == RHS)
1578     // X / X ===> 1
1579     return APInt(BitWidth, 1);
1580   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1581     // All high words are zero, just use native divide
1582     return APInt(BitWidth, this->U.pVal[0] / RHS);
1583 
1584   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1585   APInt Quotient(BitWidth, 0); // to hold result.
1586   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1587   return Quotient;
1588 }
1589 
1590 APInt APInt::sdiv(const APInt &RHS) const {
1591   if (isNegative()) {
1592     if (RHS.isNegative())
1593       return (-(*this)).udiv(-RHS);
1594     return -((-(*this)).udiv(RHS));
1595   }
1596   if (RHS.isNegative())
1597     return -(this->udiv(-RHS));
1598   return this->udiv(RHS);
1599 }
1600 
1601 APInt APInt::sdiv(int64_t RHS) const {
1602   if (isNegative()) {
1603     if (RHS < 0)
1604       return (-(*this)).udiv(-RHS);
1605     return -((-(*this)).udiv(RHS));
1606   }
1607   if (RHS < 0)
1608     return -(this->udiv(-RHS));
1609   return this->udiv(RHS);
1610 }
1611 
1612 APInt APInt::urem(const APInt &RHS) const {
1613   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1614   if (isSingleWord()) {
1615     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1616     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1617   }
1618 
1619   // Get some facts about the LHS
1620   unsigned lhsWords = getNumWords(getActiveBits());
1621 
1622   // Get some facts about the RHS
1623   unsigned rhsBits = RHS.getActiveBits();
1624   unsigned rhsWords = getNumWords(rhsBits);
1625   assert(rhsWords && "Performing remainder operation by zero ???");
1626 
1627   // Check the degenerate cases
1628   if (lhsWords == 0)
1629     // 0 % Y ===> 0
1630     return APInt(BitWidth, 0);
1631   if (rhsBits == 1)
1632     // X % 1 ===> 0
1633     return APInt(BitWidth, 0);
1634   if (lhsWords < rhsWords || this->ult(RHS))
1635     // X % Y ===> X, iff X < Y
1636     return *this;
1637   if (*this == RHS)
1638     // X % X == 0;
1639     return APInt(BitWidth, 0);
1640   if (lhsWords == 1)
1641     // All high words are zero, just use native remainder
1642     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1643 
1644   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1645   APInt Remainder(BitWidth, 0);
1646   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1647   return Remainder;
1648 }
1649 
1650 uint64_t APInt::urem(uint64_t RHS) const {
1651   assert(RHS != 0 && "Remainder by zero?");
1652 
1653   if (isSingleWord())
1654     return U.VAL % RHS;
1655 
1656   // Get some facts about the LHS
1657   unsigned lhsWords = getNumWords(getActiveBits());
1658 
1659   // Check the degenerate cases
1660   if (lhsWords == 0)
1661     // 0 % Y ===> 0
1662     return 0;
1663   if (RHS == 1)
1664     // X % 1 ===> 0
1665     return 0;
1666   if (this->ult(RHS))
1667     // X % Y ===> X, iff X < Y
1668     return getZExtValue();
1669   if (*this == RHS)
1670     // X % X == 0;
1671     return 0;
1672   if (lhsWords == 1)
1673     // All high words are zero, just use native remainder
1674     return U.pVal[0] % RHS;
1675 
1676   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1677   uint64_t Remainder;
1678   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1679   return Remainder;
1680 }
1681 
1682 APInt APInt::srem(const APInt &RHS) const {
1683   if (isNegative()) {
1684     if (RHS.isNegative())
1685       return -((-(*this)).urem(-RHS));
1686     return -((-(*this)).urem(RHS));
1687   }
1688   if (RHS.isNegative())
1689     return this->urem(-RHS);
1690   return this->urem(RHS);
1691 }
1692 
1693 int64_t APInt::srem(int64_t RHS) const {
1694   if (isNegative()) {
1695     if (RHS < 0)
1696       return -((-(*this)).urem(-RHS));
1697     return -((-(*this)).urem(RHS));
1698   }
1699   if (RHS < 0)
1700     return this->urem(-RHS);
1701   return this->urem(RHS);
1702 }
1703 
1704 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1705                     APInt &Quotient, APInt &Remainder) {
1706   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1707   unsigned BitWidth = LHS.BitWidth;
1708 
1709   // First, deal with the easy case
1710   if (LHS.isSingleWord()) {
1711     assert(RHS.U.VAL != 0 && "Divide by zero?");
1712     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1713     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1714     Quotient = APInt(BitWidth, QuotVal);
1715     Remainder = APInt(BitWidth, RemVal);
1716     return;
1717   }
1718 
1719   // Get some size facts about the dividend and divisor
1720   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1721   unsigned rhsBits  = RHS.getActiveBits();
1722   unsigned rhsWords = getNumWords(rhsBits);
1723   assert(rhsWords && "Performing divrem operation by zero ???");
1724 
1725   // Check the degenerate cases
1726   if (lhsWords == 0) {
1727     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1728     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1729     return;
1730   }
1731 
1732   if (rhsBits == 1) {
1733     Quotient = LHS;                   // X / 1 ===> X
1734     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1735   }
1736 
1737   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1738     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1739     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1740     return;
1741   }
1742 
1743   if (LHS == RHS) {
1744     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1745     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1746     return;
1747   }
1748 
1749   // Make sure there is enough space to hold the results.
1750   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1751   // change the size. This is necessary if Quotient or Remainder is aliased
1752   // with LHS or RHS.
1753   Quotient.reallocate(BitWidth);
1754   Remainder.reallocate(BitWidth);
1755 
1756   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1757     // There is only one word to consider so use the native versions.
1758     uint64_t lhsValue = LHS.U.pVal[0];
1759     uint64_t rhsValue = RHS.U.pVal[0];
1760     Quotient = lhsValue / rhsValue;
1761     Remainder = lhsValue % rhsValue;
1762     return;
1763   }
1764 
1765   // Okay, lets do it the long way
1766   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1767          Remainder.U.pVal);
1768   // Clear the rest of the Quotient and Remainder.
1769   std::memset(Quotient.U.pVal + lhsWords, 0,
1770               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1771   std::memset(Remainder.U.pVal + rhsWords, 0,
1772               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1773 }
1774 
1775 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1776                     uint64_t &Remainder) {
1777   assert(RHS != 0 && "Divide by zero?");
1778   unsigned BitWidth = LHS.BitWidth;
1779 
1780   // First, deal with the easy case
1781   if (LHS.isSingleWord()) {
1782     uint64_t QuotVal = LHS.U.VAL / RHS;
1783     Remainder = LHS.U.VAL % RHS;
1784     Quotient = APInt(BitWidth, QuotVal);
1785     return;
1786   }
1787 
1788   // Get some size facts about the dividend and divisor
1789   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1790 
1791   // Check the degenerate cases
1792   if (lhsWords == 0) {
1793     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1794     Remainder = 0;                    // 0 % Y ===> 0
1795     return;
1796   }
1797 
1798   if (RHS == 1) {
1799     Quotient = LHS;                   // X / 1 ===> X
1800     Remainder = 0;                    // X % 1 ===> 0
1801     return;
1802   }
1803 
1804   if (LHS.ult(RHS)) {
1805     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1806     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1807     return;
1808   }
1809 
1810   if (LHS == RHS) {
1811     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1812     Remainder = 0;                    // X % X ===> 0;
1813     return;
1814   }
1815 
1816   // Make sure there is enough space to hold the results.
1817   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1818   // change the size. This is necessary if Quotient is aliased with LHS.
1819   Quotient.reallocate(BitWidth);
1820 
1821   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1822     // There is only one word to consider so use the native versions.
1823     uint64_t lhsValue = LHS.U.pVal[0];
1824     Quotient = lhsValue / RHS;
1825     Remainder = lhsValue % RHS;
1826     return;
1827   }
1828 
1829   // Okay, lets do it the long way
1830   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1831   // Clear the rest of the Quotient.
1832   std::memset(Quotient.U.pVal + lhsWords, 0,
1833               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1834 }
1835 
1836 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1837                     APInt &Quotient, APInt &Remainder) {
1838   if (LHS.isNegative()) {
1839     if (RHS.isNegative())
1840       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1841     else {
1842       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1843       Quotient.negate();
1844     }
1845     Remainder.negate();
1846   } else if (RHS.isNegative()) {
1847     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1848     Quotient.negate();
1849   } else {
1850     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1851   }
1852 }
1853 
1854 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1855                     APInt &Quotient, int64_t &Remainder) {
1856   uint64_t R = Remainder;
1857   if (LHS.isNegative()) {
1858     if (RHS < 0)
1859       APInt::udivrem(-LHS, -RHS, Quotient, R);
1860     else {
1861       APInt::udivrem(-LHS, RHS, Quotient, R);
1862       Quotient.negate();
1863     }
1864     R = -R;
1865   } else if (RHS < 0) {
1866     APInt::udivrem(LHS, -RHS, Quotient, R);
1867     Quotient.negate();
1868   } else {
1869     APInt::udivrem(LHS, RHS, Quotient, R);
1870   }
1871   Remainder = R;
1872 }
1873 
1874 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1875   APInt Res = *this+RHS;
1876   Overflow = isNonNegative() == RHS.isNonNegative() &&
1877              Res.isNonNegative() != isNonNegative();
1878   return Res;
1879 }
1880 
1881 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1882   APInt Res = *this+RHS;
1883   Overflow = Res.ult(RHS);
1884   return Res;
1885 }
1886 
1887 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1888   APInt Res = *this - RHS;
1889   Overflow = isNonNegative() != RHS.isNonNegative() &&
1890              Res.isNonNegative() != isNonNegative();
1891   return Res;
1892 }
1893 
1894 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1895   APInt Res = *this-RHS;
1896   Overflow = Res.ugt(*this);
1897   return Res;
1898 }
1899 
1900 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1901   // MININT/-1  -->  overflow.
1902   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1903   return sdiv(RHS);
1904 }
1905 
1906 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1907   APInt Res = *this * RHS;
1908 
1909   if (*this != 0 && RHS != 0)
1910     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1911   else
1912     Overflow = false;
1913   return Res;
1914 }
1915 
1916 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1917   APInt Res = *this * RHS;
1918 
1919   if (*this != 0 && RHS != 0)
1920     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1921   else
1922     Overflow = false;
1923   return Res;
1924 }
1925 
1926 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1927   Overflow = ShAmt.uge(getBitWidth());
1928   if (Overflow)
1929     return APInt(BitWidth, 0);
1930 
1931   if (isNonNegative()) // Don't allow sign change.
1932     Overflow = ShAmt.uge(countLeadingZeros());
1933   else
1934     Overflow = ShAmt.uge(countLeadingOnes());
1935 
1936   return *this << ShAmt;
1937 }
1938 
1939 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1940   Overflow = ShAmt.uge(getBitWidth());
1941   if (Overflow)
1942     return APInt(BitWidth, 0);
1943 
1944   Overflow = ShAmt.ugt(countLeadingZeros());
1945 
1946   return *this << ShAmt;
1947 }
1948 
1949 APInt APInt::sadd_sat(const APInt &RHS) const {
1950   bool Overflow;
1951   APInt Res = sadd_ov(RHS, Overflow);
1952   if (!Overflow)
1953     return Res;
1954 
1955   return isNegative() ? APInt::getSignedMinValue(BitWidth)
1956                       : APInt::getSignedMaxValue(BitWidth);
1957 }
1958 
1959 APInt APInt::uadd_sat(const APInt &RHS) const {
1960   bool Overflow;
1961   APInt Res = uadd_ov(RHS, Overflow);
1962   if (!Overflow)
1963     return Res;
1964 
1965   return APInt::getMaxValue(BitWidth);
1966 }
1967 
1968 APInt APInt::ssub_sat(const APInt &RHS) const {
1969   bool Overflow;
1970   APInt Res = ssub_ov(RHS, Overflow);
1971   if (!Overflow)
1972     return Res;
1973 
1974   return isNegative() ? APInt::getSignedMinValue(BitWidth)
1975                       : APInt::getSignedMaxValue(BitWidth);
1976 }
1977 
1978 APInt APInt::usub_sat(const APInt &RHS) const {
1979   bool Overflow;
1980   APInt Res = usub_ov(RHS, Overflow);
1981   if (!Overflow)
1982     return Res;
1983 
1984   return APInt(BitWidth, 0);
1985 }
1986 
1987 
1988 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1989   // Check our assumptions here
1990   assert(!str.empty() && "Invalid string length");
1991   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1992           radix == 36) &&
1993          "Radix should be 2, 8, 10, 16, or 36!");
1994 
1995   StringRef::iterator p = str.begin();
1996   size_t slen = str.size();
1997   bool isNeg = *p == '-';
1998   if (*p == '-' || *p == '+') {
1999     p++;
2000     slen--;
2001     assert(slen && "String is only a sign, needs a value.");
2002   }
2003   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2004   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2005   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2006   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2007          "Insufficient bit width");
2008 
2009   // Allocate memory if needed
2010   if (isSingleWord())
2011     U.VAL = 0;
2012   else
2013     U.pVal = getClearedMemory(getNumWords());
2014 
2015   // Figure out if we can shift instead of multiply
2016   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2017 
2018   // Enter digit traversal loop
2019   for (StringRef::iterator e = str.end(); p != e; ++p) {
2020     unsigned digit = getDigit(*p, radix);
2021     assert(digit < radix && "Invalid character in digit string");
2022 
2023     // Shift or multiply the value by the radix
2024     if (slen > 1) {
2025       if (shift)
2026         *this <<= shift;
2027       else
2028         *this *= radix;
2029     }
2030 
2031     // Add in the digit we just interpreted
2032     *this += digit;
2033   }
2034   // If its negative, put it in two's complement form
2035   if (isNeg)
2036     this->negate();
2037 }
2038 
2039 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2040                      bool Signed, bool formatAsCLiteral) const {
2041   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2042           Radix == 36) &&
2043          "Radix should be 2, 8, 10, 16, or 36!");
2044 
2045   const char *Prefix = "";
2046   if (formatAsCLiteral) {
2047     switch (Radix) {
2048       case 2:
2049         // Binary literals are a non-standard extension added in gcc 4.3:
2050         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2051         Prefix = "0b";
2052         break;
2053       case 8:
2054         Prefix = "0";
2055         break;
2056       case 10:
2057         break; // No prefix
2058       case 16:
2059         Prefix = "0x";
2060         break;
2061       default:
2062         llvm_unreachable("Invalid radix!");
2063     }
2064   }
2065 
2066   // First, check for a zero value and just short circuit the logic below.
2067   if (*this == 0) {
2068     while (*Prefix) {
2069       Str.push_back(*Prefix);
2070       ++Prefix;
2071     };
2072     Str.push_back('0');
2073     return;
2074   }
2075 
2076   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2077 
2078   if (isSingleWord()) {
2079     char Buffer[65];
2080     char *BufPtr = std::end(Buffer);
2081 
2082     uint64_t N;
2083     if (!Signed) {
2084       N = getZExtValue();
2085     } else {
2086       int64_t I = getSExtValue();
2087       if (I >= 0) {
2088         N = I;
2089       } else {
2090         Str.push_back('-');
2091         N = -(uint64_t)I;
2092       }
2093     }
2094 
2095     while (*Prefix) {
2096       Str.push_back(*Prefix);
2097       ++Prefix;
2098     };
2099 
2100     while (N) {
2101       *--BufPtr = Digits[N % Radix];
2102       N /= Radix;
2103     }
2104     Str.append(BufPtr, std::end(Buffer));
2105     return;
2106   }
2107 
2108   APInt Tmp(*this);
2109 
2110   if (Signed && isNegative()) {
2111     // They want to print the signed version and it is a negative value
2112     // Flip the bits and add one to turn it into the equivalent positive
2113     // value and put a '-' in the result.
2114     Tmp.negate();
2115     Str.push_back('-');
2116   }
2117 
2118   while (*Prefix) {
2119     Str.push_back(*Prefix);
2120     ++Prefix;
2121   };
2122 
2123   // We insert the digits backward, then reverse them to get the right order.
2124   unsigned StartDig = Str.size();
2125 
2126   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2127   // because the number of bits per digit (1, 3 and 4 respectively) divides
2128   // equally.  We just shift until the value is zero.
2129   if (Radix == 2 || Radix == 8 || Radix == 16) {
2130     // Just shift tmp right for each digit width until it becomes zero
2131     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2132     unsigned MaskAmt = Radix - 1;
2133 
2134     while (Tmp.getBoolValue()) {
2135       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2136       Str.push_back(Digits[Digit]);
2137       Tmp.lshrInPlace(ShiftAmt);
2138     }
2139   } else {
2140     while (Tmp.getBoolValue()) {
2141       uint64_t Digit;
2142       udivrem(Tmp, Radix, Tmp, Digit);
2143       assert(Digit < Radix && "divide failed");
2144       Str.push_back(Digits[Digit]);
2145     }
2146   }
2147 
2148   // Reverse the digits before returning.
2149   std::reverse(Str.begin()+StartDig, Str.end());
2150 }
2151 
2152 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2153 /// It is better to pass in a SmallVector/SmallString to the methods above.
2154 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2155   SmallString<40> S;
2156   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2157   return S.str();
2158 }
2159 
2160 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2161 LLVM_DUMP_METHOD void APInt::dump() const {
2162   SmallString<40> S, U;
2163   this->toStringUnsigned(U);
2164   this->toStringSigned(S);
2165   dbgs() << "APInt(" << BitWidth << "b, "
2166          << U << "u " << S << "s)\n";
2167 }
2168 #endif
2169 
2170 void APInt::print(raw_ostream &OS, bool isSigned) const {
2171   SmallString<40> S;
2172   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2173   OS << S;
2174 }
2175 
2176 // This implements a variety of operations on a representation of
2177 // arbitrary precision, two's-complement, bignum integer values.
2178 
2179 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2180 // and unrestricting assumption.
2181 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2182               "Part width must be divisible by 2!");
2183 
2184 /* Some handy functions local to this file.  */
2185 
2186 /* Returns the integer part with the least significant BITS set.
2187    BITS cannot be zero.  */
2188 static inline APInt::WordType lowBitMask(unsigned bits) {
2189   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2190 
2191   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2192 }
2193 
2194 /* Returns the value of the lower half of PART.  */
2195 static inline APInt::WordType lowHalf(APInt::WordType part) {
2196   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2197 }
2198 
2199 /* Returns the value of the upper half of PART.  */
2200 static inline APInt::WordType highHalf(APInt::WordType part) {
2201   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2202 }
2203 
2204 /* Returns the bit number of the most significant set bit of a part.
2205    If the input number has no bits set -1U is returned.  */
2206 static unsigned partMSB(APInt::WordType value) {
2207   return findLastSet(value, ZB_Max);
2208 }
2209 
2210 /* Returns the bit number of the least significant set bit of a
2211    part.  If the input number has no bits set -1U is returned.  */
2212 static unsigned partLSB(APInt::WordType value) {
2213   return findFirstSet(value, ZB_Max);
2214 }
2215 
2216 /* Sets the least significant part of a bignum to the input value, and
2217    zeroes out higher parts.  */
2218 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2219   assert(parts > 0);
2220 
2221   dst[0] = part;
2222   for (unsigned i = 1; i < parts; i++)
2223     dst[i] = 0;
2224 }
2225 
2226 /* Assign one bignum to another.  */
2227 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2228   for (unsigned i = 0; i < parts; i++)
2229     dst[i] = src[i];
2230 }
2231 
2232 /* Returns true if a bignum is zero, false otherwise.  */
2233 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2234   for (unsigned i = 0; i < parts; i++)
2235     if (src[i])
2236       return false;
2237 
2238   return true;
2239 }
2240 
2241 /* Extract the given bit of a bignum; returns 0 or 1.  */
2242 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2243   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2244 }
2245 
2246 /* Set the given bit of a bignum. */
2247 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2248   parts[whichWord(bit)] |= maskBit(bit);
2249 }
2250 
2251 /* Clears the given bit of a bignum. */
2252 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2253   parts[whichWord(bit)] &= ~maskBit(bit);
2254 }
2255 
2256 /* Returns the bit number of the least significant set bit of a
2257    number.  If the input number has no bits set -1U is returned.  */
2258 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2259   for (unsigned i = 0; i < n; i++) {
2260     if (parts[i] != 0) {
2261       unsigned lsb = partLSB(parts[i]);
2262 
2263       return lsb + i * APINT_BITS_PER_WORD;
2264     }
2265   }
2266 
2267   return -1U;
2268 }
2269 
2270 /* Returns the bit number of the most significant set bit of a number.
2271    If the input number has no bits set -1U is returned.  */
2272 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2273   do {
2274     --n;
2275 
2276     if (parts[n] != 0) {
2277       unsigned msb = partMSB(parts[n]);
2278 
2279       return msb + n * APINT_BITS_PER_WORD;
2280     }
2281   } while (n);
2282 
2283   return -1U;
2284 }
2285 
2286 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2287    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2288    the least significant bit of DST.  All high bits above srcBITS in
2289    DST are zero-filled.  */
2290 void
2291 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2292                  unsigned srcBits, unsigned srcLSB) {
2293   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2294   assert(dstParts <= dstCount);
2295 
2296   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2297   tcAssign (dst, src + firstSrcPart, dstParts);
2298 
2299   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2300   tcShiftRight (dst, dstParts, shift);
2301 
2302   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2303      in DST.  If this is less that srcBits, append the rest, else
2304      clear the high bits.  */
2305   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2306   if (n < srcBits) {
2307     WordType mask = lowBitMask (srcBits - n);
2308     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2309                           << n % APINT_BITS_PER_WORD);
2310   } else if (n > srcBits) {
2311     if (srcBits % APINT_BITS_PER_WORD)
2312       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2313   }
2314 
2315   /* Clear high parts.  */
2316   while (dstParts < dstCount)
2317     dst[dstParts++] = 0;
2318 }
2319 
2320 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2321 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2322                              WordType c, unsigned parts) {
2323   assert(c <= 1);
2324 
2325   for (unsigned i = 0; i < parts; i++) {
2326     WordType l = dst[i];
2327     if (c) {
2328       dst[i] += rhs[i] + 1;
2329       c = (dst[i] <= l);
2330     } else {
2331       dst[i] += rhs[i];
2332       c = (dst[i] < l);
2333     }
2334   }
2335 
2336   return c;
2337 }
2338 
2339 /// This function adds a single "word" integer, src, to the multiple
2340 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2341 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2342 /// @returns the carry of the addition.
2343 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2344                                  unsigned parts) {
2345   for (unsigned i = 0; i < parts; ++i) {
2346     dst[i] += src;
2347     if (dst[i] >= src)
2348       return 0; // No need to carry so exit early.
2349     src = 1; // Carry one to next digit.
2350   }
2351 
2352   return 1;
2353 }
2354 
2355 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2356 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2357                                   WordType c, unsigned parts) {
2358   assert(c <= 1);
2359 
2360   for (unsigned i = 0; i < parts; i++) {
2361     WordType l = dst[i];
2362     if (c) {
2363       dst[i] -= rhs[i] + 1;
2364       c = (dst[i] >= l);
2365     } else {
2366       dst[i] -= rhs[i];
2367       c = (dst[i] > l);
2368     }
2369   }
2370 
2371   return c;
2372 }
2373 
2374 /// This function subtracts a single "word" (64-bit word), src, from
2375 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2376 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2377 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2378 /// exhausted. In other words, if src > dst then this function returns 1,
2379 /// otherwise 0.
2380 /// @returns the borrow out of the subtraction
2381 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2382                                       unsigned parts) {
2383   for (unsigned i = 0; i < parts; ++i) {
2384     WordType Dst = dst[i];
2385     dst[i] -= src;
2386     if (src <= Dst)
2387       return 0; // No need to borrow so exit early.
2388     src = 1; // We have to "borrow 1" from next "word"
2389   }
2390 
2391   return 1;
2392 }
2393 
2394 /* Negate a bignum in-place.  */
2395 void APInt::tcNegate(WordType *dst, unsigned parts) {
2396   tcComplement(dst, parts);
2397   tcIncrement(dst, parts);
2398 }
2399 
2400 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2401     DST  = SRC * MULTIPLIER + CARRY   if add is false
2402 
2403     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2404     they must start at the same point, i.e. DST == SRC.
2405 
2406     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2407     returned.  Otherwise DST is filled with the least significant
2408     DSTPARTS parts of the result, and if all of the omitted higher
2409     parts were zero return zero, otherwise overflow occurred and
2410     return one.  */
2411 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2412                           WordType multiplier, WordType carry,
2413                           unsigned srcParts, unsigned dstParts,
2414                           bool add) {
2415   /* Otherwise our writes of DST kill our later reads of SRC.  */
2416   assert(dst <= src || dst >= src + srcParts);
2417   assert(dstParts <= srcParts + 1);
2418 
2419   /* N loops; minimum of dstParts and srcParts.  */
2420   unsigned n = std::min(dstParts, srcParts);
2421 
2422   for (unsigned i = 0; i < n; i++) {
2423     WordType low, mid, high, srcPart;
2424 
2425       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2426 
2427          This cannot overflow, because
2428 
2429          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2430 
2431          which is less than n^2.  */
2432 
2433     srcPart = src[i];
2434 
2435     if (multiplier == 0 || srcPart == 0) {
2436       low = carry;
2437       high = 0;
2438     } else {
2439       low = lowHalf(srcPart) * lowHalf(multiplier);
2440       high = highHalf(srcPart) * highHalf(multiplier);
2441 
2442       mid = lowHalf(srcPart) * highHalf(multiplier);
2443       high += highHalf(mid);
2444       mid <<= APINT_BITS_PER_WORD / 2;
2445       if (low + mid < low)
2446         high++;
2447       low += mid;
2448 
2449       mid = highHalf(srcPart) * lowHalf(multiplier);
2450       high += highHalf(mid);
2451       mid <<= APINT_BITS_PER_WORD / 2;
2452       if (low + mid < low)
2453         high++;
2454       low += mid;
2455 
2456       /* Now add carry.  */
2457       if (low + carry < low)
2458         high++;
2459       low += carry;
2460     }
2461 
2462     if (add) {
2463       /* And now DST[i], and store the new low part there.  */
2464       if (low + dst[i] < low)
2465         high++;
2466       dst[i] += low;
2467     } else
2468       dst[i] = low;
2469 
2470     carry = high;
2471   }
2472 
2473   if (srcParts < dstParts) {
2474     /* Full multiplication, there is no overflow.  */
2475     assert(srcParts + 1 == dstParts);
2476     dst[srcParts] = carry;
2477     return 0;
2478   }
2479 
2480   /* We overflowed if there is carry.  */
2481   if (carry)
2482     return 1;
2483 
2484   /* We would overflow if any significant unwritten parts would be
2485      non-zero.  This is true if any remaining src parts are non-zero
2486      and the multiplier is non-zero.  */
2487   if (multiplier)
2488     for (unsigned i = dstParts; i < srcParts; i++)
2489       if (src[i])
2490         return 1;
2491 
2492   /* We fitted in the narrow destination.  */
2493   return 0;
2494 }
2495 
2496 /* DST = LHS * RHS, where DST has the same width as the operands and
2497    is filled with the least significant parts of the result.  Returns
2498    one if overflow occurred, otherwise zero.  DST must be disjoint
2499    from both operands.  */
2500 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2501                       const WordType *rhs, unsigned parts) {
2502   assert(dst != lhs && dst != rhs);
2503 
2504   int overflow = 0;
2505   tcSet(dst, 0, parts);
2506 
2507   for (unsigned i = 0; i < parts; i++)
2508     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2509                                parts - i, true);
2510 
2511   return overflow;
2512 }
2513 
2514 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2515 /// operands. No overflow occurs. DST must be disjoint from both operands.
2516 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2517                            const WordType *rhs, unsigned lhsParts,
2518                            unsigned rhsParts) {
2519   /* Put the narrower number on the LHS for less loops below.  */
2520   if (lhsParts > rhsParts)
2521     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2522 
2523   assert(dst != lhs && dst != rhs);
2524 
2525   tcSet(dst, 0, rhsParts);
2526 
2527   for (unsigned i = 0; i < lhsParts; i++)
2528     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2529 }
2530 
2531 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2532    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2533    set REMAINDER to the remainder, return zero.  i.e.
2534 
2535    OLD_LHS = RHS * LHS + REMAINDER
2536 
2537    SCRATCH is a bignum of the same size as the operands and result for
2538    use by the routine; its contents need not be initialized and are
2539    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2540 */
2541 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2542                     WordType *remainder, WordType *srhs,
2543                     unsigned parts) {
2544   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2545 
2546   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2547   if (shiftCount == 0)
2548     return true;
2549 
2550   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2551   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2552   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2553 
2554   tcAssign(srhs, rhs, parts);
2555   tcShiftLeft(srhs, parts, shiftCount);
2556   tcAssign(remainder, lhs, parts);
2557   tcSet(lhs, 0, parts);
2558 
2559   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2560      the total.  */
2561   for (;;) {
2562     int compare = tcCompare(remainder, srhs, parts);
2563     if (compare >= 0) {
2564       tcSubtract(remainder, srhs, 0, parts);
2565       lhs[n] |= mask;
2566     }
2567 
2568     if (shiftCount == 0)
2569       break;
2570     shiftCount--;
2571     tcShiftRight(srhs, parts, 1);
2572     if ((mask >>= 1) == 0) {
2573       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2574       n--;
2575     }
2576   }
2577 
2578   return false;
2579 }
2580 
2581 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2582 /// no restrictions on Count.
2583 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2584   // Don't bother performing a no-op shift.
2585   if (!Count)
2586     return;
2587 
2588   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2589   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2590   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2591 
2592   // Fastpath for moving by whole words.
2593   if (BitShift == 0) {
2594     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2595   } else {
2596     while (Words-- > WordShift) {
2597       Dst[Words] = Dst[Words - WordShift] << BitShift;
2598       if (Words > WordShift)
2599         Dst[Words] |=
2600           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2601     }
2602   }
2603 
2604   // Fill in the remainder with 0s.
2605   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2606 }
2607 
2608 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2609 /// are no restrictions on Count.
2610 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2611   // Don't bother performing a no-op shift.
2612   if (!Count)
2613     return;
2614 
2615   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2616   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2617   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2618 
2619   unsigned WordsToMove = Words - WordShift;
2620   // Fastpath for moving by whole words.
2621   if (BitShift == 0) {
2622     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2623   } else {
2624     for (unsigned i = 0; i != WordsToMove; ++i) {
2625       Dst[i] = Dst[i + WordShift] >> BitShift;
2626       if (i + 1 != WordsToMove)
2627         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2628     }
2629   }
2630 
2631   // Fill in the remainder with 0s.
2632   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2633 }
2634 
2635 /* Bitwise and of two bignums.  */
2636 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2637   for (unsigned i = 0; i < parts; i++)
2638     dst[i] &= rhs[i];
2639 }
2640 
2641 /* Bitwise inclusive or of two bignums.  */
2642 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2643   for (unsigned i = 0; i < parts; i++)
2644     dst[i] |= rhs[i];
2645 }
2646 
2647 /* Bitwise exclusive or of two bignums.  */
2648 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2649   for (unsigned i = 0; i < parts; i++)
2650     dst[i] ^= rhs[i];
2651 }
2652 
2653 /* Complement a bignum in-place.  */
2654 void APInt::tcComplement(WordType *dst, unsigned parts) {
2655   for (unsigned i = 0; i < parts; i++)
2656     dst[i] = ~dst[i];
2657 }
2658 
2659 /* Comparison (unsigned) of two bignums.  */
2660 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2661                      unsigned parts) {
2662   while (parts) {
2663     parts--;
2664     if (lhs[parts] != rhs[parts])
2665       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2666   }
2667 
2668   return 0;
2669 }
2670 
2671 /* Set the least significant BITS bits of a bignum, clear the
2672    rest.  */
2673 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2674                                       unsigned bits) {
2675   unsigned i = 0;
2676   while (bits > APINT_BITS_PER_WORD) {
2677     dst[i++] = ~(WordType) 0;
2678     bits -= APINT_BITS_PER_WORD;
2679   }
2680 
2681   if (bits)
2682     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2683 
2684   while (i < parts)
2685     dst[i++] = 0;
2686 }
2687 
2688 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2689                                    APInt::Rounding RM) {
2690   // Currently udivrem always rounds down.
2691   switch (RM) {
2692   case APInt::Rounding::DOWN:
2693   case APInt::Rounding::TOWARD_ZERO:
2694     return A.udiv(B);
2695   case APInt::Rounding::UP: {
2696     APInt Quo, Rem;
2697     APInt::udivrem(A, B, Quo, Rem);
2698     if (Rem == 0)
2699       return Quo;
2700     return Quo + 1;
2701   }
2702   }
2703   llvm_unreachable("Unknown APInt::Rounding enum");
2704 }
2705 
2706 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2707                                    APInt::Rounding RM) {
2708   switch (RM) {
2709   case APInt::Rounding::DOWN:
2710   case APInt::Rounding::UP: {
2711     APInt Quo, Rem;
2712     APInt::sdivrem(A, B, Quo, Rem);
2713     if (Rem == 0)
2714       return Quo;
2715     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2716     // We want to check whether the non-integer part of the mathematical value
2717     // is negative or not. If the non-integer part is negative, we need to round
2718     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2719     // already rounded down.
2720     if (RM == APInt::Rounding::DOWN) {
2721       if (Rem.isNegative() != B.isNegative())
2722         return Quo - 1;
2723       return Quo;
2724     }
2725     if (Rem.isNegative() != B.isNegative())
2726       return Quo;
2727     return Quo + 1;
2728   }
2729   // Currently sdiv rounds twards zero.
2730   case APInt::Rounding::TOWARD_ZERO:
2731     return A.sdiv(B);
2732   }
2733   llvm_unreachable("Unknown APInt::Rounding enum");
2734 }
2735 
2736 Optional<APInt>
2737 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2738                                            unsigned RangeWidth) {
2739   unsigned CoeffWidth = A.getBitWidth();
2740   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2741   assert(RangeWidth <= CoeffWidth &&
2742          "Value range width should be less than coefficient width");
2743   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2744 
2745   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2746                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2747 
2748   // Identify 0 as a (non)solution immediately.
2749   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2750     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2751     return APInt(CoeffWidth, 0);
2752   }
2753 
2754   // The result of APInt arithmetic has the same bit width as the operands,
2755   // so it can actually lose high bits. A product of two n-bit integers needs
2756   // 2n-1 bits to represent the full value.
2757   // The operation done below (on quadratic coefficients) that can produce
2758   // the largest value is the evaluation of the equation during bisection,
2759   // which needs 3 times the bitwidth of the coefficient, so the total number
2760   // of required bits is 3n.
2761   //
2762   // The purpose of this extension is to simulate the set Z of all integers,
2763   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2764   // and negative numbers (not so much in a modulo arithmetic). The method
2765   // used to solve the equation is based on the standard formula for real
2766   // numbers, and uses the concepts of "positive" and "negative" with their
2767   // usual meanings.
2768   CoeffWidth *= 3;
2769   A = A.sext(CoeffWidth);
2770   B = B.sext(CoeffWidth);
2771   C = C.sext(CoeffWidth);
2772 
2773   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2774   // the bit width has increased.
2775   if (A.isNegative()) {
2776     A.negate();
2777     B.negate();
2778     C.negate();
2779   }
2780 
2781   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2782   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2783   // and R = 2^BitWidth.
2784   // Since we're trying not only to find exact solutions, but also values
2785   // that "wrap around", such a set will always have a solution, i.e. an x
2786   // that satisfies at least one of the equations, or such that |q(x)|
2787   // exceeds kR, while |q(x-1)| for the same k does not.
2788   //
2789   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2790   // positive solution n (in the above sense), and also such that the n
2791   // will be the least among all solutions corresponding to k = 0, 1, ...
2792   // (more precisely, the least element in the set
2793   //   { n(k) | k is such that a solution n(k) exists }).
2794   //
2795   // Consider the parabola (over real numbers) that corresponds to the
2796   // quadratic equation. Since A > 0, the arms of the parabola will point
2797   // up. Picking different values of k will shift it up and down by R.
2798   //
2799   // We want to shift the parabola in such a way as to reduce the problem
2800   // of solving q(x) = kR to solving shifted_q(x) = 0.
2801   // (The interesting solutions are the ceilings of the real number
2802   // solutions.)
2803   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2804   APInt TwoA = 2 * A;
2805   APInt SqrB = B * B;
2806   bool PickLow;
2807 
2808   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2809     assert(A.isStrictlyPositive());
2810     APInt T = V.abs().urem(A);
2811     if (T.isNullValue())
2812       return V;
2813     return V.isNegative() ? V+T : V+(A-T);
2814   };
2815 
2816   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2817   // iff B is positive.
2818   if (B.isNonNegative()) {
2819     // If B >= 0, the vertex it at a negative location (or at 0), so in
2820     // order to have a non-negative solution we need to pick k that makes
2821     // C-kR negative. To satisfy all the requirements for the solution
2822     // that we are looking for, it needs to be closest to 0 of all k.
2823     C = C.srem(R);
2824     if (C.isStrictlyPositive())
2825       C -= R;
2826     // Pick the greater solution.
2827     PickLow = false;
2828   } else {
2829     // If B < 0, the vertex is at a positive location. For any solution
2830     // to exist, the discriminant must be non-negative. This means that
2831     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2832     // lower bound on values of k: kR >= C - B^2/4A.
2833     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2834     // Round LowkR up (towards +inf) to the nearest kR.
2835     LowkR = RoundUp(LowkR, R);
2836 
2837     // If there exists k meeting the condition above, and such that
2838     // C-kR > 0, there will be two positive real number solutions of
2839     // q(x) = kR. Out of all such values of k, pick the one that makes
2840     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2841     // In other words, find maximum k such that LowkR <= kR < C.
2842     if (C.sgt(LowkR)) {
2843       // If LowkR < C, then such a k is guaranteed to exist because
2844       // LowkR itself is a multiple of R.
2845       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2846       // Pick the smaller solution.
2847       PickLow = true;
2848     } else {
2849       // If C-kR < 0 for all potential k's, it means that one solution
2850       // will be negative, while the other will be positive. The positive
2851       // solution will shift towards 0 if the parabola is moved up.
2852       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2853       // to 0, or in other words, out of all parabolas that have solutions,
2854       // pick the one that is the farthest "up").
2855       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2856       C -= LowkR;
2857       // Pick the greater solution.
2858       PickLow = false;
2859     }
2860   }
2861 
2862   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2863                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2864 
2865   APInt D = SqrB - 4*A*C;
2866   assert(D.isNonNegative() && "Negative discriminant");
2867   APInt SQ = D.sqrt();
2868 
2869   APInt Q = SQ * SQ;
2870   bool InexactSQ = Q != D;
2871   // The calculated SQ may actually be greater than the exact (non-integer)
2872   // value. If that's the case, decremement SQ to get a value that is lower.
2873   if (Q.sgt(D))
2874     SQ -= 1;
2875 
2876   APInt X;
2877   APInt Rem;
2878 
2879   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2880   // When using the quadratic formula directly, the calculated low root
2881   // may be greater than the exact one, since we would be subtracting SQ.
2882   // To make sure that the calculated root is not greater than the exact
2883   // one, subtract SQ+1 when calculating the low root (for inexact value
2884   // of SQ).
2885   if (PickLow)
2886     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2887   else
2888     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2889 
2890   // The updated coefficients should be such that the (exact) solution is
2891   // positive. Since APInt division rounds towards 0, the calculated one
2892   // can be 0, but cannot be negative.
2893   assert(X.isNonNegative() && "Solution should be non-negative");
2894 
2895   if (!InexactSQ && Rem.isNullValue()) {
2896     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2897     return X;
2898   }
2899 
2900   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2901   // The exact value of the square root of D should be between SQ and SQ+1.
2902   // This implies that the solution should be between that corresponding to
2903   // SQ (i.e. X) and that corresponding to SQ+1.
2904   //
2905   // The calculated X cannot be greater than the exact (real) solution.
2906   // Actually it must be strictly less than the exact solution, while
2907   // X+1 will be greater than or equal to it.
2908 
2909   APInt VX = (A*X + B)*X + C;
2910   APInt VY = VX + TwoA*X + A + B;
2911   bool SignChange = VX.isNegative() != VY.isNegative() ||
2912                     VX.isNullValue() != VY.isNullValue();
2913   // If the sign did not change between X and X+1, X is not a valid solution.
2914   // This could happen when the actual (exact) roots don't have an integer
2915   // between them, so they would both be contained between X and X+1.
2916   if (!SignChange) {
2917     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2918     return None;
2919   }
2920 
2921   X += 1;
2922   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2923   return X;
2924 }
2925