1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APint to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246 
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250 
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254 
255 APInt& APInt::operator*=(const APInt& RHS) {
256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORDTYPE_MAX;
328 }
329 
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332   tcComplement(U.pVal, getNumWords());
333   clearUnusedBits();
334 }
335 
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
341   if ((*this)[bitPosition]) clearBit(bitPosition);
342   else setBit(bitPosition);
343 }
344 
345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346   unsigned subBitWidth = subBits.getBitWidth();
347   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348          "Illegal bit insertion");
349 
350   // Insertion is a direct copy.
351   if (subBitWidth == BitWidth) {
352     *this = subBits;
353     return;
354   }
355 
356   // Single word result can be done as a direct bitmask.
357   if (isSingleWord()) {
358     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359     U.VAL &= ~(mask << bitPosition);
360     U.VAL |= (subBits.U.VAL << bitPosition);
361     return;
362   }
363 
364   unsigned loBit = whichBit(bitPosition);
365   unsigned loWord = whichWord(bitPosition);
366   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367 
368   // Insertion within a single word can be done as a direct bitmask.
369   if (loWord == hi1Word) {
370     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371     U.pVal[loWord] &= ~(mask << loBit);
372     U.pVal[loWord] |= (subBits.U.VAL << loBit);
373     return;
374   }
375 
376   // Insert on word boundaries.
377   if (loBit == 0) {
378     // Direct copy whole words.
379     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380     memcpy(U.pVal + loWord, subBits.getRawData(),
381            numWholeSubWords * APINT_WORD_SIZE);
382 
383     // Mask+insert remaining bits.
384     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385     if (remainingBits != 0) {
386       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387       U.pVal[hi1Word] &= ~mask;
388       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389     }
390     return;
391   }
392 
393   // General case - set/clear individual bits in dst based on src.
394   // TODO - there is scope for optimization here, but at the moment this code
395   // path is barely used so prefer readability over performance.
396   for (unsigned i = 0; i != subBitWidth; ++i) {
397     if (subBits[i])
398       setBit(bitPosition + i);
399     else
400       clearBit(bitPosition + i);
401   }
402 }
403 
404 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
405   assert(numBits > 0 && "Can't extract zero bits");
406   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
407          "Illegal bit extraction");
408 
409   if (isSingleWord())
410     return APInt(numBits, U.VAL >> bitPosition);
411 
412   unsigned loBit = whichBit(bitPosition);
413   unsigned loWord = whichWord(bitPosition);
414   unsigned hiWord = whichWord(bitPosition + numBits - 1);
415 
416   // Single word result extracting bits from a single word source.
417   if (loWord == hiWord)
418     return APInt(numBits, U.pVal[loWord] >> loBit);
419 
420   // Extracting bits that start on a source word boundary can be done
421   // as a fast memory copy.
422   if (loBit == 0)
423     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
424 
425   // General case - shift + copy source words directly into place.
426   APInt Result(numBits, 0);
427   unsigned NumSrcWords = getNumWords();
428   unsigned NumDstWords = Result.getNumWords();
429 
430   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
431   for (unsigned word = 0; word < NumDstWords; ++word) {
432     uint64_t w0 = U.pVal[loWord + word];
433     uint64_t w1 =
434         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
435     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
436   }
437 
438   return Result.clearUnusedBits();
439 }
440 
441 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
442   assert(!str.empty() && "Invalid string length");
443   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
444           radix == 36) &&
445          "Radix should be 2, 8, 10, 16, or 36!");
446 
447   size_t slen = str.size();
448 
449   // Each computation below needs to know if it's negative.
450   StringRef::iterator p = str.begin();
451   unsigned isNegative = *p == '-';
452   if (*p == '-' || *p == '+') {
453     p++;
454     slen--;
455     assert(slen && "String is only a sign, needs a value.");
456   }
457 
458   // For radixes of power-of-two values, the bits required is accurately and
459   // easily computed
460   if (radix == 2)
461     return slen + isNegative;
462   if (radix == 8)
463     return slen * 3 + isNegative;
464   if (radix == 16)
465     return slen * 4 + isNegative;
466 
467   // FIXME: base 36
468 
469   // This is grossly inefficient but accurate. We could probably do something
470   // with a computation of roughly slen*64/20 and then adjust by the value of
471   // the first few digits. But, I'm not sure how accurate that could be.
472 
473   // Compute a sufficient number of bits that is always large enough but might
474   // be too large. This avoids the assertion in the constructor. This
475   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
476   // bits in that case.
477   unsigned sufficient
478     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
479                  : (slen == 1 ? 7 : slen * 16/3);
480 
481   // Convert to the actual binary value.
482   APInt tmp(sufficient, StringRef(p, slen), radix);
483 
484   // Compute how many bits are required. If the log is infinite, assume we need
485   // just bit.
486   unsigned log = tmp.logBase2();
487   if (log == (unsigned)-1) {
488     return isNegative + 1;
489   } else {
490     return isNegative + log + 1;
491   }
492 }
493 
494 hash_code llvm::hash_value(const APInt &Arg) {
495   if (Arg.isSingleWord())
496     return hash_combine(Arg.U.VAL);
497 
498   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
499 }
500 
501 bool APInt::isSplat(unsigned SplatSizeInBits) const {
502   assert(getBitWidth() % SplatSizeInBits == 0 &&
503          "SplatSizeInBits must divide width!");
504   // We can check that all parts of an integer are equal by making use of a
505   // little trick: rotate and check if it's still the same value.
506   return *this == rotl(SplatSizeInBits);
507 }
508 
509 /// This function returns the high "numBits" bits of this APInt.
510 APInt APInt::getHiBits(unsigned numBits) const {
511   return this->lshr(BitWidth - numBits);
512 }
513 
514 /// This function returns the low "numBits" bits of this APInt.
515 APInt APInt::getLoBits(unsigned numBits) const {
516   APInt Result(getLowBitsSet(BitWidth, numBits));
517   Result &= *this;
518   return Result;
519 }
520 
521 /// Return a value containing V broadcasted over NewLen bits.
522 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
523   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
524 
525   APInt Val = V.zextOrSelf(NewLen);
526   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
527     Val |= Val << I;
528 
529   return Val;
530 }
531 
532 unsigned APInt::countLeadingZerosSlowCase() const {
533   unsigned Count = 0;
534   for (int i = getNumWords()-1; i >= 0; --i) {
535     uint64_t V = U.pVal[i];
536     if (V == 0)
537       Count += APINT_BITS_PER_WORD;
538     else {
539       Count += llvm::countLeadingZeros(V);
540       break;
541     }
542   }
543   // Adjust for unused bits in the most significant word (they are zero).
544   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
545   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
546   return Count;
547 }
548 
549 unsigned APInt::countLeadingOnesSlowCase() const {
550   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
551   unsigned shift;
552   if (!highWordBits) {
553     highWordBits = APINT_BITS_PER_WORD;
554     shift = 0;
555   } else {
556     shift = APINT_BITS_PER_WORD - highWordBits;
557   }
558   int i = getNumWords() - 1;
559   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
560   if (Count == highWordBits) {
561     for (i--; i >= 0; --i) {
562       if (U.pVal[i] == WORDTYPE_MAX)
563         Count += APINT_BITS_PER_WORD;
564       else {
565         Count += llvm::countLeadingOnes(U.pVal[i]);
566         break;
567       }
568     }
569   }
570   return Count;
571 }
572 
573 unsigned APInt::countTrailingZerosSlowCase() const {
574   unsigned Count = 0;
575   unsigned i = 0;
576   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
577     Count += APINT_BITS_PER_WORD;
578   if (i < getNumWords())
579     Count += llvm::countTrailingZeros(U.pVal[i]);
580   return std::min(Count, BitWidth);
581 }
582 
583 unsigned APInt::countTrailingOnesSlowCase() const {
584   unsigned Count = 0;
585   unsigned i = 0;
586   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
587     Count += APINT_BITS_PER_WORD;
588   if (i < getNumWords())
589     Count += llvm::countTrailingOnes(U.pVal[i]);
590   assert(Count <= BitWidth);
591   return Count;
592 }
593 
594 unsigned APInt::countPopulationSlowCase() const {
595   unsigned Count = 0;
596   for (unsigned i = 0; i < getNumWords(); ++i)
597     Count += llvm::countPopulation(U.pVal[i]);
598   return Count;
599 }
600 
601 bool APInt::intersectsSlowCase(const APInt &RHS) const {
602   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
603     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
604       return true;
605 
606   return false;
607 }
608 
609 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
610   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
611     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
612       return false;
613 
614   return true;
615 }
616 
617 APInt APInt::byteSwap() const {
618   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
619   if (BitWidth == 16)
620     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
621   if (BitWidth == 32)
622     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
623   if (BitWidth == 48) {
624     unsigned Tmp1 = unsigned(U.VAL >> 16);
625     Tmp1 = ByteSwap_32(Tmp1);
626     uint16_t Tmp2 = uint16_t(U.VAL);
627     Tmp2 = ByteSwap_16(Tmp2);
628     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
629   }
630   if (BitWidth == 64)
631     return APInt(BitWidth, ByteSwap_64(U.VAL));
632 
633   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
634   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
635     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
636   if (Result.BitWidth != BitWidth) {
637     Result.lshrInPlace(Result.BitWidth - BitWidth);
638     Result.BitWidth = BitWidth;
639   }
640   return Result;
641 }
642 
643 APInt APInt::reverseBits() const {
644   switch (BitWidth) {
645   case 64:
646     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
647   case 32:
648     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
649   case 16:
650     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
651   case 8:
652     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
653   default:
654     break;
655   }
656 
657   APInt Val(*this);
658   APInt Reversed(BitWidth, 0);
659   unsigned S = BitWidth;
660 
661   for (; Val != 0; Val.lshrInPlace(1)) {
662     Reversed <<= 1;
663     Reversed |= Val[0];
664     --S;
665   }
666 
667   Reversed <<= S;
668   return Reversed;
669 }
670 
671 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
672   // Fast-path a common case.
673   if (A == B) return A;
674 
675   // Corner cases: if either operand is zero, the other is the gcd.
676   if (!A) return B;
677   if (!B) return A;
678 
679   // Count common powers of 2 and remove all other powers of 2.
680   unsigned Pow2;
681   {
682     unsigned Pow2_A = A.countTrailingZeros();
683     unsigned Pow2_B = B.countTrailingZeros();
684     if (Pow2_A > Pow2_B) {
685       A.lshrInPlace(Pow2_A - Pow2_B);
686       Pow2 = Pow2_B;
687     } else if (Pow2_B > Pow2_A) {
688       B.lshrInPlace(Pow2_B - Pow2_A);
689       Pow2 = Pow2_A;
690     } else {
691       Pow2 = Pow2_A;
692     }
693   }
694 
695   // Both operands are odd multiples of 2^Pow_2:
696   //
697   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
698   //
699   // This is a modified version of Stein's algorithm, taking advantage of
700   // efficient countTrailingZeros().
701   while (A != B) {
702     if (A.ugt(B)) {
703       A -= B;
704       A.lshrInPlace(A.countTrailingZeros() - Pow2);
705     } else {
706       B -= A;
707       B.lshrInPlace(B.countTrailingZeros() - Pow2);
708     }
709   }
710 
711   return A;
712 }
713 
714 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
715   uint64_t I = bit_cast<uint64_t>(Double);
716 
717   // Get the sign bit from the highest order bit
718   bool isNeg = I >> 63;
719 
720   // Get the 11-bit exponent and adjust for the 1023 bit bias
721   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
722 
723   // If the exponent is negative, the value is < 0 so just return 0.
724   if (exp < 0)
725     return APInt(width, 0u);
726 
727   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
728   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
729 
730   // If the exponent doesn't shift all bits out of the mantissa
731   if (exp < 52)
732     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
733                     APInt(width, mantissa >> (52 - exp));
734 
735   // If the client didn't provide enough bits for us to shift the mantissa into
736   // then the result is undefined, just return 0
737   if (width <= exp - 52)
738     return APInt(width, 0);
739 
740   // Otherwise, we have to shift the mantissa bits up to the right location
741   APInt Tmp(width, mantissa);
742   Tmp <<= (unsigned)exp - 52;
743   return isNeg ? -Tmp : Tmp;
744 }
745 
746 /// This function converts this APInt to a double.
747 /// The layout for double is as following (IEEE Standard 754):
748 ///  --------------------------------------
749 /// |  Sign    Exponent    Fraction    Bias |
750 /// |-------------------------------------- |
751 /// |  1[63]   11[62-52]   52[51-00]   1023 |
752 ///  --------------------------------------
753 double APInt::roundToDouble(bool isSigned) const {
754 
755   // Handle the simple case where the value is contained in one uint64_t.
756   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
757   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
758     if (isSigned) {
759       int64_t sext = SignExtend64(getWord(0), BitWidth);
760       return double(sext);
761     } else
762       return double(getWord(0));
763   }
764 
765   // Determine if the value is negative.
766   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
767 
768   // Construct the absolute value if we're negative.
769   APInt Tmp(isNeg ? -(*this) : (*this));
770 
771   // Figure out how many bits we're using.
772   unsigned n = Tmp.getActiveBits();
773 
774   // The exponent (without bias normalization) is just the number of bits
775   // we are using. Note that the sign bit is gone since we constructed the
776   // absolute value.
777   uint64_t exp = n;
778 
779   // Return infinity for exponent overflow
780   if (exp > 1023) {
781     if (!isSigned || !isNeg)
782       return std::numeric_limits<double>::infinity();
783     else
784       return -std::numeric_limits<double>::infinity();
785   }
786   exp += 1023; // Increment for 1023 bias
787 
788   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
789   // extract the high 52 bits from the correct words in pVal.
790   uint64_t mantissa;
791   unsigned hiWord = whichWord(n-1);
792   if (hiWord == 0) {
793     mantissa = Tmp.U.pVal[0];
794     if (n > 52)
795       mantissa >>= n - 52; // shift down, we want the top 52 bits.
796   } else {
797     assert(hiWord > 0 && "huh?");
798     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
799     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
800     mantissa = hibits | lobits;
801   }
802 
803   // The leading bit of mantissa is implicit, so get rid of it.
804   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
805   uint64_t I = sign | (exp << 52) | mantissa;
806   return bit_cast<double>(I);
807 }
808 
809 // Truncate to new width.
810 APInt APInt::trunc(unsigned width) const {
811   assert(width < BitWidth && "Invalid APInt Truncate request");
812   assert(width && "Can't truncate to 0 bits");
813 
814   if (width <= APINT_BITS_PER_WORD)
815     return APInt(width, getRawData()[0]);
816 
817   APInt Result(getMemory(getNumWords(width)), width);
818 
819   // Copy full words.
820   unsigned i;
821   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
822     Result.U.pVal[i] = U.pVal[i];
823 
824   // Truncate and copy any partial word.
825   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
826   if (bits != 0)
827     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
828 
829   return Result;
830 }
831 
832 // Sign extend to a new width.
833 APInt APInt::sext(unsigned Width) const {
834   assert(Width > BitWidth && "Invalid APInt SignExtend request");
835 
836   if (Width <= APINT_BITS_PER_WORD)
837     return APInt(Width, SignExtend64(U.VAL, BitWidth));
838 
839   APInt Result(getMemory(getNumWords(Width)), Width);
840 
841   // Copy words.
842   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
843 
844   // Sign extend the last word since there may be unused bits in the input.
845   Result.U.pVal[getNumWords() - 1] =
846       SignExtend64(Result.U.pVal[getNumWords() - 1],
847                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
848 
849   // Fill with sign bits.
850   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
851               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
852   Result.clearUnusedBits();
853   return Result;
854 }
855 
856 //  Zero extend to a new width.
857 APInt APInt::zext(unsigned width) const {
858   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
859 
860   if (width <= APINT_BITS_PER_WORD)
861     return APInt(width, U.VAL);
862 
863   APInt Result(getMemory(getNumWords(width)), width);
864 
865   // Copy words.
866   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
867 
868   // Zero remaining words.
869   std::memset(Result.U.pVal + getNumWords(), 0,
870               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
871 
872   return Result;
873 }
874 
875 APInt APInt::zextOrTrunc(unsigned width) const {
876   if (BitWidth < width)
877     return zext(width);
878   if (BitWidth > width)
879     return trunc(width);
880   return *this;
881 }
882 
883 APInt APInt::sextOrTrunc(unsigned width) const {
884   if (BitWidth < width)
885     return sext(width);
886   if (BitWidth > width)
887     return trunc(width);
888   return *this;
889 }
890 
891 APInt APInt::zextOrSelf(unsigned width) const {
892   if (BitWidth < width)
893     return zext(width);
894   return *this;
895 }
896 
897 APInt APInt::sextOrSelf(unsigned width) const {
898   if (BitWidth < width)
899     return sext(width);
900   return *this;
901 }
902 
903 /// Arithmetic right-shift this APInt by shiftAmt.
904 /// Arithmetic right-shift function.
905 void APInt::ashrInPlace(const APInt &shiftAmt) {
906   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
907 }
908 
909 /// Arithmetic right-shift this APInt by shiftAmt.
910 /// Arithmetic right-shift function.
911 void APInt::ashrSlowCase(unsigned ShiftAmt) {
912   // Don't bother performing a no-op shift.
913   if (!ShiftAmt)
914     return;
915 
916   // Save the original sign bit for later.
917   bool Negative = isNegative();
918 
919   // WordShift is the inter-part shift; BitShift is intra-part shift.
920   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
921   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
922 
923   unsigned WordsToMove = getNumWords() - WordShift;
924   if (WordsToMove != 0) {
925     // Sign extend the last word to fill in the unused bits.
926     U.pVal[getNumWords() - 1] = SignExtend64(
927         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
928 
929     // Fastpath for moving by whole words.
930     if (BitShift == 0) {
931       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
932     } else {
933       // Move the words containing significant bits.
934       for (unsigned i = 0; i != WordsToMove - 1; ++i)
935         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
936                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
937 
938       // Handle the last word which has no high bits to copy.
939       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
940       // Sign extend one more time.
941       U.pVal[WordsToMove - 1] =
942           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
943     }
944   }
945 
946   // Fill in the remainder based on the original sign.
947   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
948               WordShift * APINT_WORD_SIZE);
949   clearUnusedBits();
950 }
951 
952 /// Logical right-shift this APInt by shiftAmt.
953 /// Logical right-shift function.
954 void APInt::lshrInPlace(const APInt &shiftAmt) {
955   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
956 }
957 
958 /// Logical right-shift this APInt by shiftAmt.
959 /// Logical right-shift function.
960 void APInt::lshrSlowCase(unsigned ShiftAmt) {
961   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
962 }
963 
964 /// Left-shift this APInt by shiftAmt.
965 /// Left-shift function.
966 APInt &APInt::operator<<=(const APInt &shiftAmt) {
967   // It's undefined behavior in C to shift by BitWidth or greater.
968   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
969   return *this;
970 }
971 
972 void APInt::shlSlowCase(unsigned ShiftAmt) {
973   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
974   clearUnusedBits();
975 }
976 
977 // Calculate the rotate amount modulo the bit width.
978 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
979   unsigned rotBitWidth = rotateAmt.getBitWidth();
980   APInt rot = rotateAmt;
981   if (rotBitWidth < BitWidth) {
982     // Extend the rotate APInt, so that the urem doesn't divide by 0.
983     // e.g. APInt(1, 32) would give APInt(1, 0).
984     rot = rotateAmt.zext(BitWidth);
985   }
986   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
987   return rot.getLimitedValue(BitWidth);
988 }
989 
990 APInt APInt::rotl(const APInt &rotateAmt) const {
991   return rotl(rotateModulo(BitWidth, rotateAmt));
992 }
993 
994 APInt APInt::rotl(unsigned rotateAmt) const {
995   rotateAmt %= BitWidth;
996   if (rotateAmt == 0)
997     return *this;
998   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
999 }
1000 
1001 APInt APInt::rotr(const APInt &rotateAmt) const {
1002   return rotr(rotateModulo(BitWidth, rotateAmt));
1003 }
1004 
1005 APInt APInt::rotr(unsigned rotateAmt) const {
1006   rotateAmt %= BitWidth;
1007   if (rotateAmt == 0)
1008     return *this;
1009   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1010 }
1011 
1012 // Square Root - this method computes and returns the square root of "this".
1013 // Three mechanisms are used for computation. For small values (<= 5 bits),
1014 // a table lookup is done. This gets some performance for common cases. For
1015 // values using less than 52 bits, the value is converted to double and then
1016 // the libc sqrt function is called. The result is rounded and then converted
1017 // back to a uint64_t which is then used to construct the result. Finally,
1018 // the Babylonian method for computing square roots is used.
1019 APInt APInt::sqrt() const {
1020 
1021   // Determine the magnitude of the value.
1022   unsigned magnitude = getActiveBits();
1023 
1024   // Use a fast table for some small values. This also gets rid of some
1025   // rounding errors in libc sqrt for small values.
1026   if (magnitude <= 5) {
1027     static const uint8_t results[32] = {
1028       /*     0 */ 0,
1029       /*  1- 2 */ 1, 1,
1030       /*  3- 6 */ 2, 2, 2, 2,
1031       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1032       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1033       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1034       /*    31 */ 6
1035     };
1036     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1037   }
1038 
1039   // If the magnitude of the value fits in less than 52 bits (the precision of
1040   // an IEEE double precision floating point value), then we can use the
1041   // libc sqrt function which will probably use a hardware sqrt computation.
1042   // This should be faster than the algorithm below.
1043   if (magnitude < 52) {
1044     return APInt(BitWidth,
1045                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1046                                                                : U.pVal[0])))));
1047   }
1048 
1049   // Okay, all the short cuts are exhausted. We must compute it. The following
1050   // is a classical Babylonian method for computing the square root. This code
1051   // was adapted to APInt from a wikipedia article on such computations.
1052   // See http://www.wikipedia.org/ and go to the page named
1053   // Calculate_an_integer_square_root.
1054   unsigned nbits = BitWidth, i = 4;
1055   APInt testy(BitWidth, 16);
1056   APInt x_old(BitWidth, 1);
1057   APInt x_new(BitWidth, 0);
1058   APInt two(BitWidth, 2);
1059 
1060   // Select a good starting value using binary logarithms.
1061   for (;; i += 2, testy = testy.shl(2))
1062     if (i >= nbits || this->ule(testy)) {
1063       x_old = x_old.shl(i / 2);
1064       break;
1065     }
1066 
1067   // Use the Babylonian method to arrive at the integer square root:
1068   for (;;) {
1069     x_new = (this->udiv(x_old) + x_old).udiv(two);
1070     if (x_old.ule(x_new))
1071       break;
1072     x_old = x_new;
1073   }
1074 
1075   // Make sure we return the closest approximation
1076   // NOTE: The rounding calculation below is correct. It will produce an
1077   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1078   // determined to be a rounding issue with pari/gp as it begins to use a
1079   // floating point representation after 192 bits. There are no discrepancies
1080   // between this algorithm and pari/gp for bit widths < 192 bits.
1081   APInt square(x_old * x_old);
1082   APInt nextSquare((x_old + 1) * (x_old +1));
1083   if (this->ult(square))
1084     return x_old;
1085   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1086   APInt midpoint((nextSquare - square).udiv(two));
1087   APInt offset(*this - square);
1088   if (offset.ult(midpoint))
1089     return x_old;
1090   return x_old + 1;
1091 }
1092 
1093 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1094 /// iterative extended Euclidean algorithm is used to solve for this value,
1095 /// however we simplify it to speed up calculating only the inverse, and take
1096 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1097 /// (potentially large) APInts around.
1098 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1099   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1100 
1101   // Using the properties listed at the following web page (accessed 06/21/08):
1102   //   http://www.numbertheory.org/php/euclid.html
1103   // (especially the properties numbered 3, 4 and 9) it can be proved that
1104   // BitWidth bits suffice for all the computations in the algorithm implemented
1105   // below. More precisely, this number of bits suffice if the multiplicative
1106   // inverse exists, but may not suffice for the general extended Euclidean
1107   // algorithm.
1108 
1109   APInt r[2] = { modulo, *this };
1110   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1111   APInt q(BitWidth, 0);
1112 
1113   unsigned i;
1114   for (i = 0; r[i^1] != 0; i ^= 1) {
1115     // An overview of the math without the confusing bit-flipping:
1116     // q = r[i-2] / r[i-1]
1117     // r[i] = r[i-2] % r[i-1]
1118     // t[i] = t[i-2] - t[i-1] * q
1119     udivrem(r[i], r[i^1], q, r[i]);
1120     t[i] -= t[i^1] * q;
1121   }
1122 
1123   // If this APInt and the modulo are not coprime, there is no multiplicative
1124   // inverse, so return 0. We check this by looking at the next-to-last
1125   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1126   // algorithm.
1127   if (r[i] != 1)
1128     return APInt(BitWidth, 0);
1129 
1130   // The next-to-last t is the multiplicative inverse.  However, we are
1131   // interested in a positive inverse. Calculate a positive one from a negative
1132   // one if necessary. A simple addition of the modulo suffices because
1133   // abs(t[i]) is known to be less than *this/2 (see the link above).
1134   if (t[i].isNegative())
1135     t[i] += modulo;
1136 
1137   return std::move(t[i]);
1138 }
1139 
1140 /// Calculate the magic numbers required to implement a signed integer division
1141 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1142 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1143 /// Warren, Jr., chapter 10.
1144 APInt::ms APInt::magic() const {
1145   const APInt& d = *this;
1146   unsigned p;
1147   APInt ad, anc, delta, q1, r1, q2, r2, t;
1148   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1149   struct ms mag;
1150 
1151   ad = d.abs();
1152   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1153   anc = t - 1 - t.urem(ad);   // absolute value of nc
1154   p = d.getBitWidth() - 1;    // initialize p
1155   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1156   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1157   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1158   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1159   do {
1160     p = p + 1;
1161     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1162     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1163     if (r1.uge(anc)) {  // must be unsigned comparison
1164       q1 = q1 + 1;
1165       r1 = r1 - anc;
1166     }
1167     q2 = q2<<1;          // update q2 = 2p/abs(d)
1168     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1169     if (r2.uge(ad)) {   // must be unsigned comparison
1170       q2 = q2 + 1;
1171       r2 = r2 - ad;
1172     }
1173     delta = ad - r2;
1174   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1175 
1176   mag.m = q2 + 1;
1177   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1178   mag.s = p - d.getBitWidth();          // resulting shift
1179   return mag;
1180 }
1181 
1182 /// Calculate the magic numbers required to implement an unsigned integer
1183 /// division by a constant as a sequence of multiplies, adds and shifts.
1184 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1185 /// S. Warren, Jr., chapter 10.
1186 /// LeadingZeros can be used to simplify the calculation if the upper bits
1187 /// of the divided value are known zero.
1188 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1189   const APInt& d = *this;
1190   unsigned p;
1191   APInt nc, delta, q1, r1, q2, r2;
1192   struct mu magu;
1193   magu.a = 0;               // initialize "add" indicator
1194   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1195   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1196   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1197 
1198   nc = allOnes - (allOnes - d).urem(d);
1199   p = d.getBitWidth() - 1;  // initialize p
1200   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1201   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1202   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1203   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1204   do {
1205     p = p + 1;
1206     if (r1.uge(nc - r1)) {
1207       q1 = q1 + q1 + 1;  // update q1
1208       r1 = r1 + r1 - nc; // update r1
1209     }
1210     else {
1211       q1 = q1+q1; // update q1
1212       r1 = r1+r1; // update r1
1213     }
1214     if ((r2 + 1).uge(d - r2)) {
1215       if (q2.uge(signedMax)) magu.a = 1;
1216       q2 = q2+q2 + 1;     // update q2
1217       r2 = r2+r2 + 1 - d; // update r2
1218     }
1219     else {
1220       if (q2.uge(signedMin)) magu.a = 1;
1221       q2 = q2+q2;     // update q2
1222       r2 = r2+r2 + 1; // update r2
1223     }
1224     delta = d - 1 - r2;
1225   } while (p < d.getBitWidth()*2 &&
1226            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1227   magu.m = q2 + 1; // resulting magic number
1228   magu.s = p - d.getBitWidth();  // resulting shift
1229   return magu;
1230 }
1231 
1232 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1233 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1234 /// variables here have the same names as in the algorithm. Comments explain
1235 /// the algorithm and any deviation from it.
1236 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1237                      unsigned m, unsigned n) {
1238   assert(u && "Must provide dividend");
1239   assert(v && "Must provide divisor");
1240   assert(q && "Must provide quotient");
1241   assert(u != v && u != q && v != q && "Must use different memory");
1242   assert(n>1 && "n must be > 1");
1243 
1244   // b denotes the base of the number system. In our case b is 2^32.
1245   const uint64_t b = uint64_t(1) << 32;
1246 
1247 // The DEBUG macros here tend to be spam in the debug output if you're not
1248 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1249 #ifdef KNUTH_DEBUG
1250 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1251 #else
1252 #define DEBUG_KNUTH(X) do {} while(false)
1253 #endif
1254 
1255   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1256   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1257   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1258   DEBUG_KNUTH(dbgs() << " by");
1259   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1260   DEBUG_KNUTH(dbgs() << '\n');
1261   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1262   // u and v by d. Note that we have taken Knuth's advice here to use a power
1263   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1264   // 2 allows us to shift instead of multiply and it is easy to determine the
1265   // shift amount from the leading zeros.  We are basically normalizing the u
1266   // and v so that its high bits are shifted to the top of v's range without
1267   // overflow. Note that this can require an extra word in u so that u must
1268   // be of length m+n+1.
1269   unsigned shift = countLeadingZeros(v[n-1]);
1270   uint32_t v_carry = 0;
1271   uint32_t u_carry = 0;
1272   if (shift) {
1273     for (unsigned i = 0; i < m+n; ++i) {
1274       uint32_t u_tmp = u[i] >> (32 - shift);
1275       u[i] = (u[i] << shift) | u_carry;
1276       u_carry = u_tmp;
1277     }
1278     for (unsigned i = 0; i < n; ++i) {
1279       uint32_t v_tmp = v[i] >> (32 - shift);
1280       v[i] = (v[i] << shift) | v_carry;
1281       v_carry = v_tmp;
1282     }
1283   }
1284   u[m+n] = u_carry;
1285 
1286   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1287   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1288   DEBUG_KNUTH(dbgs() << " by");
1289   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1290   DEBUG_KNUTH(dbgs() << '\n');
1291 
1292   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1293   int j = m;
1294   do {
1295     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1296     // D3. [Calculate q'.].
1297     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1298     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1299     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1300     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1301     // on v[n-2] determines at high speed most of the cases in which the trial
1302     // value qp is one too large, and it eliminates all cases where qp is two
1303     // too large.
1304     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1305     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1306     uint64_t qp = dividend / v[n-1];
1307     uint64_t rp = dividend % v[n-1];
1308     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1309       qp--;
1310       rp += v[n-1];
1311       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1312         qp--;
1313     }
1314     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1315 
1316     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1317     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1318     // consists of a simple multiplication by a one-place number, combined with
1319     // a subtraction.
1320     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1321     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1322     // true value plus b**(n+1), namely as the b's complement of
1323     // the true value, and a "borrow" to the left should be remembered.
1324     int64_t borrow = 0;
1325     for (unsigned i = 0; i < n; ++i) {
1326       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1327       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1328       u[j+i] = Lo_32(subres);
1329       borrow = Hi_32(p) - Hi_32(subres);
1330       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1331                         << ", borrow = " << borrow << '\n');
1332     }
1333     bool isNeg = u[j+n] < borrow;
1334     u[j+n] -= Lo_32(borrow);
1335 
1336     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1337     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1338     DEBUG_KNUTH(dbgs() << '\n');
1339 
1340     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1341     // negative, go to step D6; otherwise go on to step D7.
1342     q[j] = Lo_32(qp);
1343     if (isNeg) {
1344       // D6. [Add back]. The probability that this step is necessary is very
1345       // small, on the order of only 2/b. Make sure that test data accounts for
1346       // this possibility. Decrease q[j] by 1
1347       q[j]--;
1348       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1349       // A carry will occur to the left of u[j+n], and it should be ignored
1350       // since it cancels with the borrow that occurred in D4.
1351       bool carry = false;
1352       for (unsigned i = 0; i < n; i++) {
1353         uint32_t limit = std::min(u[j+i],v[i]);
1354         u[j+i] += v[i] + carry;
1355         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1356       }
1357       u[j+n] += carry;
1358     }
1359     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1360     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1361     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1362 
1363     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1364   } while (--j >= 0);
1365 
1366   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1367   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1368   DEBUG_KNUTH(dbgs() << '\n');
1369 
1370   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1371   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1372   // compute the remainder (urem uses this).
1373   if (r) {
1374     // The value d is expressed by the "shift" value above since we avoided
1375     // multiplication by d by using a shift left. So, all we have to do is
1376     // shift right here.
1377     if (shift) {
1378       uint32_t carry = 0;
1379       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1380       for (int i = n-1; i >= 0; i--) {
1381         r[i] = (u[i] >> shift) | carry;
1382         carry = u[i] << (32 - shift);
1383         DEBUG_KNUTH(dbgs() << " " << r[i]);
1384       }
1385     } else {
1386       for (int i = n-1; i >= 0; i--) {
1387         r[i] = u[i];
1388         DEBUG_KNUTH(dbgs() << " " << r[i]);
1389       }
1390     }
1391     DEBUG_KNUTH(dbgs() << '\n');
1392   }
1393   DEBUG_KNUTH(dbgs() << '\n');
1394 }
1395 
1396 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1397                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1398   assert(lhsWords >= rhsWords && "Fractional result");
1399 
1400   // First, compose the values into an array of 32-bit words instead of
1401   // 64-bit words. This is a necessity of both the "short division" algorithm
1402   // and the Knuth "classical algorithm" which requires there to be native
1403   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1404   // can't use 64-bit operands here because we don't have native results of
1405   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1406   // work on large-endian machines.
1407   unsigned n = rhsWords * 2;
1408   unsigned m = (lhsWords * 2) - n;
1409 
1410   // Allocate space for the temporary values we need either on the stack, if
1411   // it will fit, or on the heap if it won't.
1412   uint32_t SPACE[128];
1413   uint32_t *U = nullptr;
1414   uint32_t *V = nullptr;
1415   uint32_t *Q = nullptr;
1416   uint32_t *R = nullptr;
1417   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1418     U = &SPACE[0];
1419     V = &SPACE[m+n+1];
1420     Q = &SPACE[(m+n+1) + n];
1421     if (Remainder)
1422       R = &SPACE[(m+n+1) + n + (m+n)];
1423   } else {
1424     U = new uint32_t[m + n + 1];
1425     V = new uint32_t[n];
1426     Q = new uint32_t[m+n];
1427     if (Remainder)
1428       R = new uint32_t[n];
1429   }
1430 
1431   // Initialize the dividend
1432   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1433   for (unsigned i = 0; i < lhsWords; ++i) {
1434     uint64_t tmp = LHS[i];
1435     U[i * 2] = Lo_32(tmp);
1436     U[i * 2 + 1] = Hi_32(tmp);
1437   }
1438   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1439 
1440   // Initialize the divisor
1441   memset(V, 0, (n)*sizeof(uint32_t));
1442   for (unsigned i = 0; i < rhsWords; ++i) {
1443     uint64_t tmp = RHS[i];
1444     V[i * 2] = Lo_32(tmp);
1445     V[i * 2 + 1] = Hi_32(tmp);
1446   }
1447 
1448   // initialize the quotient and remainder
1449   memset(Q, 0, (m+n) * sizeof(uint32_t));
1450   if (Remainder)
1451     memset(R, 0, n * sizeof(uint32_t));
1452 
1453   // Now, adjust m and n for the Knuth division. n is the number of words in
1454   // the divisor. m is the number of words by which the dividend exceeds the
1455   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1456   // contain any zero words or the Knuth algorithm fails.
1457   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1458     n--;
1459     m++;
1460   }
1461   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1462     m--;
1463 
1464   // If we're left with only a single word for the divisor, Knuth doesn't work
1465   // so we implement the short division algorithm here. This is much simpler
1466   // and faster because we are certain that we can divide a 64-bit quantity
1467   // by a 32-bit quantity at hardware speed and short division is simply a
1468   // series of such operations. This is just like doing short division but we
1469   // are using base 2^32 instead of base 10.
1470   assert(n != 0 && "Divide by zero?");
1471   if (n == 1) {
1472     uint32_t divisor = V[0];
1473     uint32_t remainder = 0;
1474     for (int i = m; i >= 0; i--) {
1475       uint64_t partial_dividend = Make_64(remainder, U[i]);
1476       if (partial_dividend == 0) {
1477         Q[i] = 0;
1478         remainder = 0;
1479       } else if (partial_dividend < divisor) {
1480         Q[i] = 0;
1481         remainder = Lo_32(partial_dividend);
1482       } else if (partial_dividend == divisor) {
1483         Q[i] = 1;
1484         remainder = 0;
1485       } else {
1486         Q[i] = Lo_32(partial_dividend / divisor);
1487         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1488       }
1489     }
1490     if (R)
1491       R[0] = remainder;
1492   } else {
1493     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1494     // case n > 1.
1495     KnuthDiv(U, V, Q, R, m, n);
1496   }
1497 
1498   // If the caller wants the quotient
1499   if (Quotient) {
1500     for (unsigned i = 0; i < lhsWords; ++i)
1501       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1502   }
1503 
1504   // If the caller wants the remainder
1505   if (Remainder) {
1506     for (unsigned i = 0; i < rhsWords; ++i)
1507       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1508   }
1509 
1510   // Clean up the memory we allocated.
1511   if (U != &SPACE[0]) {
1512     delete [] U;
1513     delete [] V;
1514     delete [] Q;
1515     delete [] R;
1516   }
1517 }
1518 
1519 APInt APInt::udiv(const APInt &RHS) const {
1520   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1521 
1522   // First, deal with the easy case
1523   if (isSingleWord()) {
1524     assert(RHS.U.VAL != 0 && "Divide by zero?");
1525     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1526   }
1527 
1528   // Get some facts about the LHS and RHS number of bits and words
1529   unsigned lhsWords = getNumWords(getActiveBits());
1530   unsigned rhsBits  = RHS.getActiveBits();
1531   unsigned rhsWords = getNumWords(rhsBits);
1532   assert(rhsWords && "Divided by zero???");
1533 
1534   // Deal with some degenerate cases
1535   if (!lhsWords)
1536     // 0 / X ===> 0
1537     return APInt(BitWidth, 0);
1538   if (rhsBits == 1)
1539     // X / 1 ===> X
1540     return *this;
1541   if (lhsWords < rhsWords || this->ult(RHS))
1542     // X / Y ===> 0, iff X < Y
1543     return APInt(BitWidth, 0);
1544   if (*this == RHS)
1545     // X / X ===> 1
1546     return APInt(BitWidth, 1);
1547   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1548     // All high words are zero, just use native divide
1549     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1550 
1551   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1552   APInt Quotient(BitWidth, 0); // to hold result.
1553   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1554   return Quotient;
1555 }
1556 
1557 APInt APInt::udiv(uint64_t RHS) const {
1558   assert(RHS != 0 && "Divide by zero?");
1559 
1560   // First, deal with the easy case
1561   if (isSingleWord())
1562     return APInt(BitWidth, U.VAL / RHS);
1563 
1564   // Get some facts about the LHS words.
1565   unsigned lhsWords = getNumWords(getActiveBits());
1566 
1567   // Deal with some degenerate cases
1568   if (!lhsWords)
1569     // 0 / X ===> 0
1570     return APInt(BitWidth, 0);
1571   if (RHS == 1)
1572     // X / 1 ===> X
1573     return *this;
1574   if (this->ult(RHS))
1575     // X / Y ===> 0, iff X < Y
1576     return APInt(BitWidth, 0);
1577   if (*this == RHS)
1578     // X / X ===> 1
1579     return APInt(BitWidth, 1);
1580   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1581     // All high words are zero, just use native divide
1582     return APInt(BitWidth, this->U.pVal[0] / RHS);
1583 
1584   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1585   APInt Quotient(BitWidth, 0); // to hold result.
1586   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1587   return Quotient;
1588 }
1589 
1590 APInt APInt::sdiv(const APInt &RHS) const {
1591   if (isNegative()) {
1592     if (RHS.isNegative())
1593       return (-(*this)).udiv(-RHS);
1594     return -((-(*this)).udiv(RHS));
1595   }
1596   if (RHS.isNegative())
1597     return -(this->udiv(-RHS));
1598   return this->udiv(RHS);
1599 }
1600 
1601 APInt APInt::sdiv(int64_t RHS) const {
1602   if (isNegative()) {
1603     if (RHS < 0)
1604       return (-(*this)).udiv(-RHS);
1605     return -((-(*this)).udiv(RHS));
1606   }
1607   if (RHS < 0)
1608     return -(this->udiv(-RHS));
1609   return this->udiv(RHS);
1610 }
1611 
1612 APInt APInt::urem(const APInt &RHS) const {
1613   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1614   if (isSingleWord()) {
1615     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1616     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1617   }
1618 
1619   // Get some facts about the LHS
1620   unsigned lhsWords = getNumWords(getActiveBits());
1621 
1622   // Get some facts about the RHS
1623   unsigned rhsBits = RHS.getActiveBits();
1624   unsigned rhsWords = getNumWords(rhsBits);
1625   assert(rhsWords && "Performing remainder operation by zero ???");
1626 
1627   // Check the degenerate cases
1628   if (lhsWords == 0)
1629     // 0 % Y ===> 0
1630     return APInt(BitWidth, 0);
1631   if (rhsBits == 1)
1632     // X % 1 ===> 0
1633     return APInt(BitWidth, 0);
1634   if (lhsWords < rhsWords || this->ult(RHS))
1635     // X % Y ===> X, iff X < Y
1636     return *this;
1637   if (*this == RHS)
1638     // X % X == 0;
1639     return APInt(BitWidth, 0);
1640   if (lhsWords == 1)
1641     // All high words are zero, just use native remainder
1642     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1643 
1644   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1645   APInt Remainder(BitWidth, 0);
1646   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1647   return Remainder;
1648 }
1649 
1650 uint64_t APInt::urem(uint64_t RHS) const {
1651   assert(RHS != 0 && "Remainder by zero?");
1652 
1653   if (isSingleWord())
1654     return U.VAL % RHS;
1655 
1656   // Get some facts about the LHS
1657   unsigned lhsWords = getNumWords(getActiveBits());
1658 
1659   // Check the degenerate cases
1660   if (lhsWords == 0)
1661     // 0 % Y ===> 0
1662     return 0;
1663   if (RHS == 1)
1664     // X % 1 ===> 0
1665     return 0;
1666   if (this->ult(RHS))
1667     // X % Y ===> X, iff X < Y
1668     return getZExtValue();
1669   if (*this == RHS)
1670     // X % X == 0;
1671     return 0;
1672   if (lhsWords == 1)
1673     // All high words are zero, just use native remainder
1674     return U.pVal[0] % RHS;
1675 
1676   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1677   uint64_t Remainder;
1678   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1679   return Remainder;
1680 }
1681 
1682 APInt APInt::srem(const APInt &RHS) const {
1683   if (isNegative()) {
1684     if (RHS.isNegative())
1685       return -((-(*this)).urem(-RHS));
1686     return -((-(*this)).urem(RHS));
1687   }
1688   if (RHS.isNegative())
1689     return this->urem(-RHS);
1690   return this->urem(RHS);
1691 }
1692 
1693 int64_t APInt::srem(int64_t RHS) const {
1694   if (isNegative()) {
1695     if (RHS < 0)
1696       return -((-(*this)).urem(-RHS));
1697     return -((-(*this)).urem(RHS));
1698   }
1699   if (RHS < 0)
1700     return this->urem(-RHS);
1701   return this->urem(RHS);
1702 }
1703 
1704 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1705                     APInt &Quotient, APInt &Remainder) {
1706   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1707   unsigned BitWidth = LHS.BitWidth;
1708 
1709   // First, deal with the easy case
1710   if (LHS.isSingleWord()) {
1711     assert(RHS.U.VAL != 0 && "Divide by zero?");
1712     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1713     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1714     Quotient = APInt(BitWidth, QuotVal);
1715     Remainder = APInt(BitWidth, RemVal);
1716     return;
1717   }
1718 
1719   // Get some size facts about the dividend and divisor
1720   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1721   unsigned rhsBits  = RHS.getActiveBits();
1722   unsigned rhsWords = getNumWords(rhsBits);
1723   assert(rhsWords && "Performing divrem operation by zero ???");
1724 
1725   // Check the degenerate cases
1726   if (lhsWords == 0) {
1727     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1728     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1729     return;
1730   }
1731 
1732   if (rhsBits == 1) {
1733     Quotient = LHS;                   // X / 1 ===> X
1734     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1735   }
1736 
1737   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1738     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1739     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1740     return;
1741   }
1742 
1743   if (LHS == RHS) {
1744     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1745     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1746     return;
1747   }
1748 
1749   // Make sure there is enough space to hold the results.
1750   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1751   // change the size. This is necessary if Quotient or Remainder is aliased
1752   // with LHS or RHS.
1753   Quotient.reallocate(BitWidth);
1754   Remainder.reallocate(BitWidth);
1755 
1756   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1757     // There is only one word to consider so use the native versions.
1758     uint64_t lhsValue = LHS.U.pVal[0];
1759     uint64_t rhsValue = RHS.U.pVal[0];
1760     Quotient = lhsValue / rhsValue;
1761     Remainder = lhsValue % rhsValue;
1762     return;
1763   }
1764 
1765   // Okay, lets do it the long way
1766   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1767          Remainder.U.pVal);
1768   // Clear the rest of the Quotient and Remainder.
1769   std::memset(Quotient.U.pVal + lhsWords, 0,
1770               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1771   std::memset(Remainder.U.pVal + rhsWords, 0,
1772               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1773 }
1774 
1775 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1776                     uint64_t &Remainder) {
1777   assert(RHS != 0 && "Divide by zero?");
1778   unsigned BitWidth = LHS.BitWidth;
1779 
1780   // First, deal with the easy case
1781   if (LHS.isSingleWord()) {
1782     uint64_t QuotVal = LHS.U.VAL / RHS;
1783     Remainder = LHS.U.VAL % RHS;
1784     Quotient = APInt(BitWidth, QuotVal);
1785     return;
1786   }
1787 
1788   // Get some size facts about the dividend and divisor
1789   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1790 
1791   // Check the degenerate cases
1792   if (lhsWords == 0) {
1793     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1794     Remainder = 0;                    // 0 % Y ===> 0
1795     return;
1796   }
1797 
1798   if (RHS == 1) {
1799     Quotient = LHS;                   // X / 1 ===> X
1800     Remainder = 0;                    // X % 1 ===> 0
1801     return;
1802   }
1803 
1804   if (LHS.ult(RHS)) {
1805     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1806     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1807     return;
1808   }
1809 
1810   if (LHS == RHS) {
1811     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1812     Remainder = 0;                    // X % X ===> 0;
1813     return;
1814   }
1815 
1816   // Make sure there is enough space to hold the results.
1817   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1818   // change the size. This is necessary if Quotient is aliased with LHS.
1819   Quotient.reallocate(BitWidth);
1820 
1821   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1822     // There is only one word to consider so use the native versions.
1823     uint64_t lhsValue = LHS.U.pVal[0];
1824     Quotient = lhsValue / RHS;
1825     Remainder = lhsValue % RHS;
1826     return;
1827   }
1828 
1829   // Okay, lets do it the long way
1830   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1831   // Clear the rest of the Quotient.
1832   std::memset(Quotient.U.pVal + lhsWords, 0,
1833               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1834 }
1835 
1836 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1837                     APInt &Quotient, APInt &Remainder) {
1838   if (LHS.isNegative()) {
1839     if (RHS.isNegative())
1840       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1841     else {
1842       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1843       Quotient.negate();
1844     }
1845     Remainder.negate();
1846   } else if (RHS.isNegative()) {
1847     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1848     Quotient.negate();
1849   } else {
1850     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1851   }
1852 }
1853 
1854 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1855                     APInt &Quotient, int64_t &Remainder) {
1856   uint64_t R = Remainder;
1857   if (LHS.isNegative()) {
1858     if (RHS < 0)
1859       APInt::udivrem(-LHS, -RHS, Quotient, R);
1860     else {
1861       APInt::udivrem(-LHS, RHS, Quotient, R);
1862       Quotient.negate();
1863     }
1864     R = -R;
1865   } else if (RHS < 0) {
1866     APInt::udivrem(LHS, -RHS, Quotient, R);
1867     Quotient.negate();
1868   } else {
1869     APInt::udivrem(LHS, RHS, Quotient, R);
1870   }
1871   Remainder = R;
1872 }
1873 
1874 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1875   APInt Res = *this+RHS;
1876   Overflow = isNonNegative() == RHS.isNonNegative() &&
1877              Res.isNonNegative() != isNonNegative();
1878   return Res;
1879 }
1880 
1881 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1882   APInt Res = *this+RHS;
1883   Overflow = Res.ult(RHS);
1884   return Res;
1885 }
1886 
1887 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1888   APInt Res = *this - RHS;
1889   Overflow = isNonNegative() != RHS.isNonNegative() &&
1890              Res.isNonNegative() != isNonNegative();
1891   return Res;
1892 }
1893 
1894 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1895   APInt Res = *this-RHS;
1896   Overflow = Res.ugt(*this);
1897   return Res;
1898 }
1899 
1900 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1901   // MININT/-1  -->  overflow.
1902   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1903   return sdiv(RHS);
1904 }
1905 
1906 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1907   APInt Res = *this * RHS;
1908 
1909   if (*this != 0 && RHS != 0)
1910     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1911   else
1912     Overflow = false;
1913   return Res;
1914 }
1915 
1916 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1917   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1918     Overflow = true;
1919     return *this * RHS;
1920   }
1921 
1922   APInt Res = lshr(1) * RHS;
1923   Overflow = Res.isNegative();
1924   Res <<= 1;
1925   if ((*this)[0]) {
1926     Res += RHS;
1927     if (Res.ult(RHS))
1928       Overflow = true;
1929   }
1930   return Res;
1931 }
1932 
1933 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1934   Overflow = ShAmt.uge(getBitWidth());
1935   if (Overflow)
1936     return APInt(BitWidth, 0);
1937 
1938   if (isNonNegative()) // Don't allow sign change.
1939     Overflow = ShAmt.uge(countLeadingZeros());
1940   else
1941     Overflow = ShAmt.uge(countLeadingOnes());
1942 
1943   return *this << ShAmt;
1944 }
1945 
1946 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1947   Overflow = ShAmt.uge(getBitWidth());
1948   if (Overflow)
1949     return APInt(BitWidth, 0);
1950 
1951   Overflow = ShAmt.ugt(countLeadingZeros());
1952 
1953   return *this << ShAmt;
1954 }
1955 
1956 APInt APInt::sadd_sat(const APInt &RHS) const {
1957   bool Overflow;
1958   APInt Res = sadd_ov(RHS, Overflow);
1959   if (!Overflow)
1960     return Res;
1961 
1962   return isNegative() ? APInt::getSignedMinValue(BitWidth)
1963                       : APInt::getSignedMaxValue(BitWidth);
1964 }
1965 
1966 APInt APInt::uadd_sat(const APInt &RHS) const {
1967   bool Overflow;
1968   APInt Res = uadd_ov(RHS, Overflow);
1969   if (!Overflow)
1970     return Res;
1971 
1972   return APInt::getMaxValue(BitWidth);
1973 }
1974 
1975 APInt APInt::ssub_sat(const APInt &RHS) const {
1976   bool Overflow;
1977   APInt Res = ssub_ov(RHS, Overflow);
1978   if (!Overflow)
1979     return Res;
1980 
1981   return isNegative() ? APInt::getSignedMinValue(BitWidth)
1982                       : APInt::getSignedMaxValue(BitWidth);
1983 }
1984 
1985 APInt APInt::usub_sat(const APInt &RHS) const {
1986   bool Overflow;
1987   APInt Res = usub_ov(RHS, Overflow);
1988   if (!Overflow)
1989     return Res;
1990 
1991   return APInt(BitWidth, 0);
1992 }
1993 
1994 
1995 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1996   // Check our assumptions here
1997   assert(!str.empty() && "Invalid string length");
1998   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1999           radix == 36) &&
2000          "Radix should be 2, 8, 10, 16, or 36!");
2001 
2002   StringRef::iterator p = str.begin();
2003   size_t slen = str.size();
2004   bool isNeg = *p == '-';
2005   if (*p == '-' || *p == '+') {
2006     p++;
2007     slen--;
2008     assert(slen && "String is only a sign, needs a value.");
2009   }
2010   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2011   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2012   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2013   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2014          "Insufficient bit width");
2015 
2016   // Allocate memory if needed
2017   if (isSingleWord())
2018     U.VAL = 0;
2019   else
2020     U.pVal = getClearedMemory(getNumWords());
2021 
2022   // Figure out if we can shift instead of multiply
2023   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2024 
2025   // Enter digit traversal loop
2026   for (StringRef::iterator e = str.end(); p != e; ++p) {
2027     unsigned digit = getDigit(*p, radix);
2028     assert(digit < radix && "Invalid character in digit string");
2029 
2030     // Shift or multiply the value by the radix
2031     if (slen > 1) {
2032       if (shift)
2033         *this <<= shift;
2034       else
2035         *this *= radix;
2036     }
2037 
2038     // Add in the digit we just interpreted
2039     *this += digit;
2040   }
2041   // If its negative, put it in two's complement form
2042   if (isNeg)
2043     this->negate();
2044 }
2045 
2046 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2047                      bool Signed, bool formatAsCLiteral) const {
2048   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2049           Radix == 36) &&
2050          "Radix should be 2, 8, 10, 16, or 36!");
2051 
2052   const char *Prefix = "";
2053   if (formatAsCLiteral) {
2054     switch (Radix) {
2055       case 2:
2056         // Binary literals are a non-standard extension added in gcc 4.3:
2057         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2058         Prefix = "0b";
2059         break;
2060       case 8:
2061         Prefix = "0";
2062         break;
2063       case 10:
2064         break; // No prefix
2065       case 16:
2066         Prefix = "0x";
2067         break;
2068       default:
2069         llvm_unreachable("Invalid radix!");
2070     }
2071   }
2072 
2073   // First, check for a zero value and just short circuit the logic below.
2074   if (*this == 0) {
2075     while (*Prefix) {
2076       Str.push_back(*Prefix);
2077       ++Prefix;
2078     };
2079     Str.push_back('0');
2080     return;
2081   }
2082 
2083   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2084 
2085   if (isSingleWord()) {
2086     char Buffer[65];
2087     char *BufPtr = std::end(Buffer);
2088 
2089     uint64_t N;
2090     if (!Signed) {
2091       N = getZExtValue();
2092     } else {
2093       int64_t I = getSExtValue();
2094       if (I >= 0) {
2095         N = I;
2096       } else {
2097         Str.push_back('-');
2098         N = -(uint64_t)I;
2099       }
2100     }
2101 
2102     while (*Prefix) {
2103       Str.push_back(*Prefix);
2104       ++Prefix;
2105     };
2106 
2107     while (N) {
2108       *--BufPtr = Digits[N % Radix];
2109       N /= Radix;
2110     }
2111     Str.append(BufPtr, std::end(Buffer));
2112     return;
2113   }
2114 
2115   APInt Tmp(*this);
2116 
2117   if (Signed && isNegative()) {
2118     // They want to print the signed version and it is a negative value
2119     // Flip the bits and add one to turn it into the equivalent positive
2120     // value and put a '-' in the result.
2121     Tmp.negate();
2122     Str.push_back('-');
2123   }
2124 
2125   while (*Prefix) {
2126     Str.push_back(*Prefix);
2127     ++Prefix;
2128   };
2129 
2130   // We insert the digits backward, then reverse them to get the right order.
2131   unsigned StartDig = Str.size();
2132 
2133   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2134   // because the number of bits per digit (1, 3 and 4 respectively) divides
2135   // equally.  We just shift until the value is zero.
2136   if (Radix == 2 || Radix == 8 || Radix == 16) {
2137     // Just shift tmp right for each digit width until it becomes zero
2138     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2139     unsigned MaskAmt = Radix - 1;
2140 
2141     while (Tmp.getBoolValue()) {
2142       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2143       Str.push_back(Digits[Digit]);
2144       Tmp.lshrInPlace(ShiftAmt);
2145     }
2146   } else {
2147     while (Tmp.getBoolValue()) {
2148       uint64_t Digit;
2149       udivrem(Tmp, Radix, Tmp, Digit);
2150       assert(Digit < Radix && "divide failed");
2151       Str.push_back(Digits[Digit]);
2152     }
2153   }
2154 
2155   // Reverse the digits before returning.
2156   std::reverse(Str.begin()+StartDig, Str.end());
2157 }
2158 
2159 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2160 /// It is better to pass in a SmallVector/SmallString to the methods above.
2161 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2162   SmallString<40> S;
2163   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2164   return S.str();
2165 }
2166 
2167 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2168 LLVM_DUMP_METHOD void APInt::dump() const {
2169   SmallString<40> S, U;
2170   this->toStringUnsigned(U);
2171   this->toStringSigned(S);
2172   dbgs() << "APInt(" << BitWidth << "b, "
2173          << U << "u " << S << "s)\n";
2174 }
2175 #endif
2176 
2177 void APInt::print(raw_ostream &OS, bool isSigned) const {
2178   SmallString<40> S;
2179   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2180   OS << S;
2181 }
2182 
2183 // This implements a variety of operations on a representation of
2184 // arbitrary precision, two's-complement, bignum integer values.
2185 
2186 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2187 // and unrestricting assumption.
2188 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2189               "Part width must be divisible by 2!");
2190 
2191 /* Some handy functions local to this file.  */
2192 
2193 /* Returns the integer part with the least significant BITS set.
2194    BITS cannot be zero.  */
2195 static inline APInt::WordType lowBitMask(unsigned bits) {
2196   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2197 
2198   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2199 }
2200 
2201 /* Returns the value of the lower half of PART.  */
2202 static inline APInt::WordType lowHalf(APInt::WordType part) {
2203   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2204 }
2205 
2206 /* Returns the value of the upper half of PART.  */
2207 static inline APInt::WordType highHalf(APInt::WordType part) {
2208   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2209 }
2210 
2211 /* Returns the bit number of the most significant set bit of a part.
2212    If the input number has no bits set -1U is returned.  */
2213 static unsigned partMSB(APInt::WordType value) {
2214   return findLastSet(value, ZB_Max);
2215 }
2216 
2217 /* Returns the bit number of the least significant set bit of a
2218    part.  If the input number has no bits set -1U is returned.  */
2219 static unsigned partLSB(APInt::WordType value) {
2220   return findFirstSet(value, ZB_Max);
2221 }
2222 
2223 /* Sets the least significant part of a bignum to the input value, and
2224    zeroes out higher parts.  */
2225 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2226   assert(parts > 0);
2227 
2228   dst[0] = part;
2229   for (unsigned i = 1; i < parts; i++)
2230     dst[i] = 0;
2231 }
2232 
2233 /* Assign one bignum to another.  */
2234 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2235   for (unsigned i = 0; i < parts; i++)
2236     dst[i] = src[i];
2237 }
2238 
2239 /* Returns true if a bignum is zero, false otherwise.  */
2240 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2241   for (unsigned i = 0; i < parts; i++)
2242     if (src[i])
2243       return false;
2244 
2245   return true;
2246 }
2247 
2248 /* Extract the given bit of a bignum; returns 0 or 1.  */
2249 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2250   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2251 }
2252 
2253 /* Set the given bit of a bignum. */
2254 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2255   parts[whichWord(bit)] |= maskBit(bit);
2256 }
2257 
2258 /* Clears the given bit of a bignum. */
2259 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2260   parts[whichWord(bit)] &= ~maskBit(bit);
2261 }
2262 
2263 /* Returns the bit number of the least significant set bit of a
2264    number.  If the input number has no bits set -1U is returned.  */
2265 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2266   for (unsigned i = 0; i < n; i++) {
2267     if (parts[i] != 0) {
2268       unsigned lsb = partLSB(parts[i]);
2269 
2270       return lsb + i * APINT_BITS_PER_WORD;
2271     }
2272   }
2273 
2274   return -1U;
2275 }
2276 
2277 /* Returns the bit number of the most significant set bit of a number.
2278    If the input number has no bits set -1U is returned.  */
2279 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2280   do {
2281     --n;
2282 
2283     if (parts[n] != 0) {
2284       unsigned msb = partMSB(parts[n]);
2285 
2286       return msb + n * APINT_BITS_PER_WORD;
2287     }
2288   } while (n);
2289 
2290   return -1U;
2291 }
2292 
2293 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2294    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2295    the least significant bit of DST.  All high bits above srcBITS in
2296    DST are zero-filled.  */
2297 void
2298 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2299                  unsigned srcBits, unsigned srcLSB) {
2300   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2301   assert(dstParts <= dstCount);
2302 
2303   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2304   tcAssign (dst, src + firstSrcPart, dstParts);
2305 
2306   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2307   tcShiftRight (dst, dstParts, shift);
2308 
2309   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2310      in DST.  If this is less that srcBits, append the rest, else
2311      clear the high bits.  */
2312   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2313   if (n < srcBits) {
2314     WordType mask = lowBitMask (srcBits - n);
2315     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2316                           << n % APINT_BITS_PER_WORD);
2317   } else if (n > srcBits) {
2318     if (srcBits % APINT_BITS_PER_WORD)
2319       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2320   }
2321 
2322   /* Clear high parts.  */
2323   while (dstParts < dstCount)
2324     dst[dstParts++] = 0;
2325 }
2326 
2327 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2328 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2329                              WordType c, unsigned parts) {
2330   assert(c <= 1);
2331 
2332   for (unsigned i = 0; i < parts; i++) {
2333     WordType l = dst[i];
2334     if (c) {
2335       dst[i] += rhs[i] + 1;
2336       c = (dst[i] <= l);
2337     } else {
2338       dst[i] += rhs[i];
2339       c = (dst[i] < l);
2340     }
2341   }
2342 
2343   return c;
2344 }
2345 
2346 /// This function adds a single "word" integer, src, to the multiple
2347 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2348 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2349 /// @returns the carry of the addition.
2350 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2351                                  unsigned parts) {
2352   for (unsigned i = 0; i < parts; ++i) {
2353     dst[i] += src;
2354     if (dst[i] >= src)
2355       return 0; // No need to carry so exit early.
2356     src = 1; // Carry one to next digit.
2357   }
2358 
2359   return 1;
2360 }
2361 
2362 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2363 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2364                                   WordType c, unsigned parts) {
2365   assert(c <= 1);
2366 
2367   for (unsigned i = 0; i < parts; i++) {
2368     WordType l = dst[i];
2369     if (c) {
2370       dst[i] -= rhs[i] + 1;
2371       c = (dst[i] >= l);
2372     } else {
2373       dst[i] -= rhs[i];
2374       c = (dst[i] > l);
2375     }
2376   }
2377 
2378   return c;
2379 }
2380 
2381 /// This function subtracts a single "word" (64-bit word), src, from
2382 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2383 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2384 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2385 /// exhausted. In other words, if src > dst then this function returns 1,
2386 /// otherwise 0.
2387 /// @returns the borrow out of the subtraction
2388 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2389                                       unsigned parts) {
2390   for (unsigned i = 0; i < parts; ++i) {
2391     WordType Dst = dst[i];
2392     dst[i] -= src;
2393     if (src <= Dst)
2394       return 0; // No need to borrow so exit early.
2395     src = 1; // We have to "borrow 1" from next "word"
2396   }
2397 
2398   return 1;
2399 }
2400 
2401 /* Negate a bignum in-place.  */
2402 void APInt::tcNegate(WordType *dst, unsigned parts) {
2403   tcComplement(dst, parts);
2404   tcIncrement(dst, parts);
2405 }
2406 
2407 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2408     DST  = SRC * MULTIPLIER + CARRY   if add is false
2409 
2410     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2411     they must start at the same point, i.e. DST == SRC.
2412 
2413     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2414     returned.  Otherwise DST is filled with the least significant
2415     DSTPARTS parts of the result, and if all of the omitted higher
2416     parts were zero return zero, otherwise overflow occurred and
2417     return one.  */
2418 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2419                           WordType multiplier, WordType carry,
2420                           unsigned srcParts, unsigned dstParts,
2421                           bool add) {
2422   /* Otherwise our writes of DST kill our later reads of SRC.  */
2423   assert(dst <= src || dst >= src + srcParts);
2424   assert(dstParts <= srcParts + 1);
2425 
2426   /* N loops; minimum of dstParts and srcParts.  */
2427   unsigned n = std::min(dstParts, srcParts);
2428 
2429   for (unsigned i = 0; i < n; i++) {
2430     WordType low, mid, high, srcPart;
2431 
2432       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2433 
2434          This cannot overflow, because
2435 
2436          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2437 
2438          which is less than n^2.  */
2439 
2440     srcPart = src[i];
2441 
2442     if (multiplier == 0 || srcPart == 0) {
2443       low = carry;
2444       high = 0;
2445     } else {
2446       low = lowHalf(srcPart) * lowHalf(multiplier);
2447       high = highHalf(srcPart) * highHalf(multiplier);
2448 
2449       mid = lowHalf(srcPart) * highHalf(multiplier);
2450       high += highHalf(mid);
2451       mid <<= APINT_BITS_PER_WORD / 2;
2452       if (low + mid < low)
2453         high++;
2454       low += mid;
2455 
2456       mid = highHalf(srcPart) * lowHalf(multiplier);
2457       high += highHalf(mid);
2458       mid <<= APINT_BITS_PER_WORD / 2;
2459       if (low + mid < low)
2460         high++;
2461       low += mid;
2462 
2463       /* Now add carry.  */
2464       if (low + carry < low)
2465         high++;
2466       low += carry;
2467     }
2468 
2469     if (add) {
2470       /* And now DST[i], and store the new low part there.  */
2471       if (low + dst[i] < low)
2472         high++;
2473       dst[i] += low;
2474     } else
2475       dst[i] = low;
2476 
2477     carry = high;
2478   }
2479 
2480   if (srcParts < dstParts) {
2481     /* Full multiplication, there is no overflow.  */
2482     assert(srcParts + 1 == dstParts);
2483     dst[srcParts] = carry;
2484     return 0;
2485   }
2486 
2487   /* We overflowed if there is carry.  */
2488   if (carry)
2489     return 1;
2490 
2491   /* We would overflow if any significant unwritten parts would be
2492      non-zero.  This is true if any remaining src parts are non-zero
2493      and the multiplier is non-zero.  */
2494   if (multiplier)
2495     for (unsigned i = dstParts; i < srcParts; i++)
2496       if (src[i])
2497         return 1;
2498 
2499   /* We fitted in the narrow destination.  */
2500   return 0;
2501 }
2502 
2503 /* DST = LHS * RHS, where DST has the same width as the operands and
2504    is filled with the least significant parts of the result.  Returns
2505    one if overflow occurred, otherwise zero.  DST must be disjoint
2506    from both operands.  */
2507 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2508                       const WordType *rhs, unsigned parts) {
2509   assert(dst != lhs && dst != rhs);
2510 
2511   int overflow = 0;
2512   tcSet(dst, 0, parts);
2513 
2514   for (unsigned i = 0; i < parts; i++)
2515     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2516                                parts - i, true);
2517 
2518   return overflow;
2519 }
2520 
2521 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2522 /// operands. No overflow occurs. DST must be disjoint from both operands.
2523 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2524                            const WordType *rhs, unsigned lhsParts,
2525                            unsigned rhsParts) {
2526   /* Put the narrower number on the LHS for less loops below.  */
2527   if (lhsParts > rhsParts)
2528     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2529 
2530   assert(dst != lhs && dst != rhs);
2531 
2532   tcSet(dst, 0, rhsParts);
2533 
2534   for (unsigned i = 0; i < lhsParts; i++)
2535     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2536 }
2537 
2538 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2539    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2540    set REMAINDER to the remainder, return zero.  i.e.
2541 
2542    OLD_LHS = RHS * LHS + REMAINDER
2543 
2544    SCRATCH is a bignum of the same size as the operands and result for
2545    use by the routine; its contents need not be initialized and are
2546    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2547 */
2548 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2549                     WordType *remainder, WordType *srhs,
2550                     unsigned parts) {
2551   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2552 
2553   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2554   if (shiftCount == 0)
2555     return true;
2556 
2557   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2558   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2559   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2560 
2561   tcAssign(srhs, rhs, parts);
2562   tcShiftLeft(srhs, parts, shiftCount);
2563   tcAssign(remainder, lhs, parts);
2564   tcSet(lhs, 0, parts);
2565 
2566   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2567      the total.  */
2568   for (;;) {
2569     int compare = tcCompare(remainder, srhs, parts);
2570     if (compare >= 0) {
2571       tcSubtract(remainder, srhs, 0, parts);
2572       lhs[n] |= mask;
2573     }
2574 
2575     if (shiftCount == 0)
2576       break;
2577     shiftCount--;
2578     tcShiftRight(srhs, parts, 1);
2579     if ((mask >>= 1) == 0) {
2580       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2581       n--;
2582     }
2583   }
2584 
2585   return false;
2586 }
2587 
2588 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2589 /// no restrictions on Count.
2590 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2591   // Don't bother performing a no-op shift.
2592   if (!Count)
2593     return;
2594 
2595   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2596   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2597   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2598 
2599   // Fastpath for moving by whole words.
2600   if (BitShift == 0) {
2601     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2602   } else {
2603     while (Words-- > WordShift) {
2604       Dst[Words] = Dst[Words - WordShift] << BitShift;
2605       if (Words > WordShift)
2606         Dst[Words] |=
2607           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2608     }
2609   }
2610 
2611   // Fill in the remainder with 0s.
2612   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2613 }
2614 
2615 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2616 /// are no restrictions on Count.
2617 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2618   // Don't bother performing a no-op shift.
2619   if (!Count)
2620     return;
2621 
2622   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2623   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2624   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2625 
2626   unsigned WordsToMove = Words - WordShift;
2627   // Fastpath for moving by whole words.
2628   if (BitShift == 0) {
2629     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2630   } else {
2631     for (unsigned i = 0; i != WordsToMove; ++i) {
2632       Dst[i] = Dst[i + WordShift] >> BitShift;
2633       if (i + 1 != WordsToMove)
2634         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2635     }
2636   }
2637 
2638   // Fill in the remainder with 0s.
2639   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2640 }
2641 
2642 /* Bitwise and of two bignums.  */
2643 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2644   for (unsigned i = 0; i < parts; i++)
2645     dst[i] &= rhs[i];
2646 }
2647 
2648 /* Bitwise inclusive or of two bignums.  */
2649 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2650   for (unsigned i = 0; i < parts; i++)
2651     dst[i] |= rhs[i];
2652 }
2653 
2654 /* Bitwise exclusive or of two bignums.  */
2655 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2656   for (unsigned i = 0; i < parts; i++)
2657     dst[i] ^= rhs[i];
2658 }
2659 
2660 /* Complement a bignum in-place.  */
2661 void APInt::tcComplement(WordType *dst, unsigned parts) {
2662   for (unsigned i = 0; i < parts; i++)
2663     dst[i] = ~dst[i];
2664 }
2665 
2666 /* Comparison (unsigned) of two bignums.  */
2667 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2668                      unsigned parts) {
2669   while (parts) {
2670     parts--;
2671     if (lhs[parts] != rhs[parts])
2672       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2673   }
2674 
2675   return 0;
2676 }
2677 
2678 /* Set the least significant BITS bits of a bignum, clear the
2679    rest.  */
2680 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2681                                       unsigned bits) {
2682   unsigned i = 0;
2683   while (bits > APINT_BITS_PER_WORD) {
2684     dst[i++] = ~(WordType) 0;
2685     bits -= APINT_BITS_PER_WORD;
2686   }
2687 
2688   if (bits)
2689     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2690 
2691   while (i < parts)
2692     dst[i++] = 0;
2693 }
2694 
2695 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2696                                    APInt::Rounding RM) {
2697   // Currently udivrem always rounds down.
2698   switch (RM) {
2699   case APInt::Rounding::DOWN:
2700   case APInt::Rounding::TOWARD_ZERO:
2701     return A.udiv(B);
2702   case APInt::Rounding::UP: {
2703     APInt Quo, Rem;
2704     APInt::udivrem(A, B, Quo, Rem);
2705     if (Rem == 0)
2706       return Quo;
2707     return Quo + 1;
2708   }
2709   }
2710   llvm_unreachable("Unknown APInt::Rounding enum");
2711 }
2712 
2713 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2714                                    APInt::Rounding RM) {
2715   switch (RM) {
2716   case APInt::Rounding::DOWN:
2717   case APInt::Rounding::UP: {
2718     APInt Quo, Rem;
2719     APInt::sdivrem(A, B, Quo, Rem);
2720     if (Rem == 0)
2721       return Quo;
2722     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2723     // We want to check whether the non-integer part of the mathematical value
2724     // is negative or not. If the non-integer part is negative, we need to round
2725     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2726     // already rounded down.
2727     if (RM == APInt::Rounding::DOWN) {
2728       if (Rem.isNegative() != B.isNegative())
2729         return Quo - 1;
2730       return Quo;
2731     }
2732     if (Rem.isNegative() != B.isNegative())
2733       return Quo;
2734     return Quo + 1;
2735   }
2736   // Currently sdiv rounds twards zero.
2737   case APInt::Rounding::TOWARD_ZERO:
2738     return A.sdiv(B);
2739   }
2740   llvm_unreachable("Unknown APInt::Rounding enum");
2741 }
2742 
2743 Optional<APInt>
2744 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2745                                            unsigned RangeWidth) {
2746   unsigned CoeffWidth = A.getBitWidth();
2747   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2748   assert(RangeWidth <= CoeffWidth &&
2749          "Value range width should be less than coefficient width");
2750   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2751 
2752   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2753                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2754 
2755   // Identify 0 as a (non)solution immediately.
2756   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2757     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2758     return APInt(CoeffWidth, 0);
2759   }
2760 
2761   // The result of APInt arithmetic has the same bit width as the operands,
2762   // so it can actually lose high bits. A product of two n-bit integers needs
2763   // 2n-1 bits to represent the full value.
2764   // The operation done below (on quadratic coefficients) that can produce
2765   // the largest value is the evaluation of the equation during bisection,
2766   // which needs 3 times the bitwidth of the coefficient, so the total number
2767   // of required bits is 3n.
2768   //
2769   // The purpose of this extension is to simulate the set Z of all integers,
2770   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2771   // and negative numbers (not so much in a modulo arithmetic). The method
2772   // used to solve the equation is based on the standard formula for real
2773   // numbers, and uses the concepts of "positive" and "negative" with their
2774   // usual meanings.
2775   CoeffWidth *= 3;
2776   A = A.sext(CoeffWidth);
2777   B = B.sext(CoeffWidth);
2778   C = C.sext(CoeffWidth);
2779 
2780   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2781   // the bit width has increased.
2782   if (A.isNegative()) {
2783     A.negate();
2784     B.negate();
2785     C.negate();
2786   }
2787 
2788   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2789   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2790   // and R = 2^BitWidth.
2791   // Since we're trying not only to find exact solutions, but also values
2792   // that "wrap around", such a set will always have a solution, i.e. an x
2793   // that satisfies at least one of the equations, or such that |q(x)|
2794   // exceeds kR, while |q(x-1)| for the same k does not.
2795   //
2796   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2797   // positive solution n (in the above sense), and also such that the n
2798   // will be the least among all solutions corresponding to k = 0, 1, ...
2799   // (more precisely, the least element in the set
2800   //   { n(k) | k is such that a solution n(k) exists }).
2801   //
2802   // Consider the parabola (over real numbers) that corresponds to the
2803   // quadratic equation. Since A > 0, the arms of the parabola will point
2804   // up. Picking different values of k will shift it up and down by R.
2805   //
2806   // We want to shift the parabola in such a way as to reduce the problem
2807   // of solving q(x) = kR to solving shifted_q(x) = 0.
2808   // (The interesting solutions are the ceilings of the real number
2809   // solutions.)
2810   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2811   APInt TwoA = 2 * A;
2812   APInt SqrB = B * B;
2813   bool PickLow;
2814 
2815   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2816     assert(A.isStrictlyPositive());
2817     APInt T = V.abs().urem(A);
2818     if (T.isNullValue())
2819       return V;
2820     return V.isNegative() ? V+T : V+(A-T);
2821   };
2822 
2823   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2824   // iff B is positive.
2825   if (B.isNonNegative()) {
2826     // If B >= 0, the vertex it at a negative location (or at 0), so in
2827     // order to have a non-negative solution we need to pick k that makes
2828     // C-kR negative. To satisfy all the requirements for the solution
2829     // that we are looking for, it needs to be closest to 0 of all k.
2830     C = C.srem(R);
2831     if (C.isStrictlyPositive())
2832       C -= R;
2833     // Pick the greater solution.
2834     PickLow = false;
2835   } else {
2836     // If B < 0, the vertex is at a positive location. For any solution
2837     // to exist, the discriminant must be non-negative. This means that
2838     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2839     // lower bound on values of k: kR >= C - B^2/4A.
2840     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2841     // Round LowkR up (towards +inf) to the nearest kR.
2842     LowkR = RoundUp(LowkR, R);
2843 
2844     // If there exists k meeting the condition above, and such that
2845     // C-kR > 0, there will be two positive real number solutions of
2846     // q(x) = kR. Out of all such values of k, pick the one that makes
2847     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2848     // In other words, find maximum k such that LowkR <= kR < C.
2849     if (C.sgt(LowkR)) {
2850       // If LowkR < C, then such a k is guaranteed to exist because
2851       // LowkR itself is a multiple of R.
2852       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2853       // Pick the smaller solution.
2854       PickLow = true;
2855     } else {
2856       // If C-kR < 0 for all potential k's, it means that one solution
2857       // will be negative, while the other will be positive. The positive
2858       // solution will shift towards 0 if the parabola is moved up.
2859       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2860       // to 0, or in other words, out of all parabolas that have solutions,
2861       // pick the one that is the farthest "up").
2862       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2863       C -= LowkR;
2864       // Pick the greater solution.
2865       PickLow = false;
2866     }
2867   }
2868 
2869   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2870                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2871 
2872   APInt D = SqrB - 4*A*C;
2873   assert(D.isNonNegative() && "Negative discriminant");
2874   APInt SQ = D.sqrt();
2875 
2876   APInt Q = SQ * SQ;
2877   bool InexactSQ = Q != D;
2878   // The calculated SQ may actually be greater than the exact (non-integer)
2879   // value. If that's the case, decremement SQ to get a value that is lower.
2880   if (Q.sgt(D))
2881     SQ -= 1;
2882 
2883   APInt X;
2884   APInt Rem;
2885 
2886   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2887   // When using the quadratic formula directly, the calculated low root
2888   // may be greater than the exact one, since we would be subtracting SQ.
2889   // To make sure that the calculated root is not greater than the exact
2890   // one, subtract SQ+1 when calculating the low root (for inexact value
2891   // of SQ).
2892   if (PickLow)
2893     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2894   else
2895     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2896 
2897   // The updated coefficients should be such that the (exact) solution is
2898   // positive. Since APInt division rounds towards 0, the calculated one
2899   // can be 0, but cannot be negative.
2900   assert(X.isNonNegative() && "Solution should be non-negative");
2901 
2902   if (!InexactSQ && Rem.isNullValue()) {
2903     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2904     return X;
2905   }
2906 
2907   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2908   // The exact value of the square root of D should be between SQ and SQ+1.
2909   // This implies that the solution should be between that corresponding to
2910   // SQ (i.e. X) and that corresponding to SQ+1.
2911   //
2912   // The calculated X cannot be greater than the exact (real) solution.
2913   // Actually it must be strictly less than the exact solution, while
2914   // X+1 will be greater than or equal to it.
2915 
2916   APInt VX = (A*X + B)*X + C;
2917   APInt VY = VX + TwoA*X + A + B;
2918   bool SignChange = VX.isNegative() != VY.isNegative() ||
2919                     VX.isNullValue() != VY.isNullValue();
2920   // If the sign did not change between X and X+1, X is not a valid solution.
2921   // This could happen when the actual (exact) roots don't have an integer
2922   // between them, so they would both be contained between X and X+1.
2923   if (!SignChange) {
2924     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2925     return None;
2926   }
2927 
2928   X += 1;
2929   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2930   return X;
2931 }
2932