1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision integer
11 // constant values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORD_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APint to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246 
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250 
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254 
255 APInt& APInt::operator*=(const APInt& RHS) {
256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORD_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORD_MAX;
328 }
329 
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332   tcComplement(U.pVal, getNumWords());
333   clearUnusedBits();
334 }
335 
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
341   if ((*this)[bitPosition]) clearBit(bitPosition);
342   else setBit(bitPosition);
343 }
344 
345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346   unsigned subBitWidth = subBits.getBitWidth();
347   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348          "Illegal bit insertion");
349 
350   // Insertion is a direct copy.
351   if (subBitWidth == BitWidth) {
352     *this = subBits;
353     return;
354   }
355 
356   // Single word result can be done as a direct bitmask.
357   if (isSingleWord()) {
358     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359     U.VAL &= ~(mask << bitPosition);
360     U.VAL |= (subBits.U.VAL << bitPosition);
361     return;
362   }
363 
364   unsigned loBit = whichBit(bitPosition);
365   unsigned loWord = whichWord(bitPosition);
366   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367 
368   // Insertion within a single word can be done as a direct bitmask.
369   if (loWord == hi1Word) {
370     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371     U.pVal[loWord] &= ~(mask << loBit);
372     U.pVal[loWord] |= (subBits.U.VAL << loBit);
373     return;
374   }
375 
376   // Insert on word boundaries.
377   if (loBit == 0) {
378     // Direct copy whole words.
379     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380     memcpy(U.pVal + loWord, subBits.getRawData(),
381            numWholeSubWords * APINT_WORD_SIZE);
382 
383     // Mask+insert remaining bits.
384     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385     if (remainingBits != 0) {
386       uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387       U.pVal[hi1Word] &= ~mask;
388       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389     }
390     return;
391   }
392 
393   // General case - set/clear individual bits in dst based on src.
394   // TODO - there is scope for optimization here, but at the moment this code
395   // path is barely used so prefer readability over performance.
396   for (unsigned i = 0; i != subBitWidth; ++i) {
397     if (subBits[i])
398       setBit(bitPosition + i);
399     else
400       clearBit(bitPosition + i);
401   }
402 }
403 
404 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
405   assert(numBits > 0 && "Can't extract zero bits");
406   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
407          "Illegal bit extraction");
408 
409   if (isSingleWord())
410     return APInt(numBits, U.VAL >> bitPosition);
411 
412   unsigned loBit = whichBit(bitPosition);
413   unsigned loWord = whichWord(bitPosition);
414   unsigned hiWord = whichWord(bitPosition + numBits - 1);
415 
416   // Single word result extracting bits from a single word source.
417   if (loWord == hiWord)
418     return APInt(numBits, U.pVal[loWord] >> loBit);
419 
420   // Extracting bits that start on a source word boundary can be done
421   // as a fast memory copy.
422   if (loBit == 0)
423     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
424 
425   // General case - shift + copy source words directly into place.
426   APInt Result(numBits, 0);
427   unsigned NumSrcWords = getNumWords();
428   unsigned NumDstWords = Result.getNumWords();
429 
430   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
431   for (unsigned word = 0; word < NumDstWords; ++word) {
432     uint64_t w0 = U.pVal[loWord + word];
433     uint64_t w1 =
434         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
435     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
436   }
437 
438   return Result.clearUnusedBits();
439 }
440 
441 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
442   assert(!str.empty() && "Invalid string length");
443   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
444           radix == 36) &&
445          "Radix should be 2, 8, 10, 16, or 36!");
446 
447   size_t slen = str.size();
448 
449   // Each computation below needs to know if it's negative.
450   StringRef::iterator p = str.begin();
451   unsigned isNegative = *p == '-';
452   if (*p == '-' || *p == '+') {
453     p++;
454     slen--;
455     assert(slen && "String is only a sign, needs a value.");
456   }
457 
458   // For radixes of power-of-two values, the bits required is accurately and
459   // easily computed
460   if (radix == 2)
461     return slen + isNegative;
462   if (radix == 8)
463     return slen * 3 + isNegative;
464   if (radix == 16)
465     return slen * 4 + isNegative;
466 
467   // FIXME: base 36
468 
469   // This is grossly inefficient but accurate. We could probably do something
470   // with a computation of roughly slen*64/20 and then adjust by the value of
471   // the first few digits. But, I'm not sure how accurate that could be.
472 
473   // Compute a sufficient number of bits that is always large enough but might
474   // be too large. This avoids the assertion in the constructor. This
475   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
476   // bits in that case.
477   unsigned sufficient
478     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
479                  : (slen == 1 ? 7 : slen * 16/3);
480 
481   // Convert to the actual binary value.
482   APInt tmp(sufficient, StringRef(p, slen), radix);
483 
484   // Compute how many bits are required. If the log is infinite, assume we need
485   // just bit.
486   unsigned log = tmp.logBase2();
487   if (log == (unsigned)-1) {
488     return isNegative + 1;
489   } else {
490     return isNegative + log + 1;
491   }
492 }
493 
494 hash_code llvm::hash_value(const APInt &Arg) {
495   if (Arg.isSingleWord())
496     return hash_combine(Arg.U.VAL);
497 
498   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
499 }
500 
501 bool APInt::isSplat(unsigned SplatSizeInBits) const {
502   assert(getBitWidth() % SplatSizeInBits == 0 &&
503          "SplatSizeInBits must divide width!");
504   // We can check that all parts of an integer are equal by making use of a
505   // little trick: rotate and check if it's still the same value.
506   return *this == rotl(SplatSizeInBits);
507 }
508 
509 /// This function returns the high "numBits" bits of this APInt.
510 APInt APInt::getHiBits(unsigned numBits) const {
511   return this->lshr(BitWidth - numBits);
512 }
513 
514 /// This function returns the low "numBits" bits of this APInt.
515 APInt APInt::getLoBits(unsigned numBits) const {
516   APInt Result(getLowBitsSet(BitWidth, numBits));
517   Result &= *this;
518   return Result;
519 }
520 
521 /// Return a value containing V broadcasted over NewLen bits.
522 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
523   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
524 
525   APInt Val = V.zextOrSelf(NewLen);
526   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
527     Val |= Val << I;
528 
529   return Val;
530 }
531 
532 unsigned APInt::countLeadingZerosSlowCase() const {
533   unsigned Count = 0;
534   for (int i = getNumWords()-1; i >= 0; --i) {
535     uint64_t V = U.pVal[i];
536     if (V == 0)
537       Count += APINT_BITS_PER_WORD;
538     else {
539       Count += llvm::countLeadingZeros(V);
540       break;
541     }
542   }
543   // Adjust for unused bits in the most significant word (they are zero).
544   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
545   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
546   return Count;
547 }
548 
549 unsigned APInt::countLeadingOnesSlowCase() const {
550   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
551   unsigned shift;
552   if (!highWordBits) {
553     highWordBits = APINT_BITS_PER_WORD;
554     shift = 0;
555   } else {
556     shift = APINT_BITS_PER_WORD - highWordBits;
557   }
558   int i = getNumWords() - 1;
559   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
560   if (Count == highWordBits) {
561     for (i--; i >= 0; --i) {
562       if (U.pVal[i] == WORD_MAX)
563         Count += APINT_BITS_PER_WORD;
564       else {
565         Count += llvm::countLeadingOnes(U.pVal[i]);
566         break;
567       }
568     }
569   }
570   return Count;
571 }
572 
573 unsigned APInt::countTrailingZerosSlowCase() const {
574   unsigned Count = 0;
575   unsigned i = 0;
576   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
577     Count += APINT_BITS_PER_WORD;
578   if (i < getNumWords())
579     Count += llvm::countTrailingZeros(U.pVal[i]);
580   return std::min(Count, BitWidth);
581 }
582 
583 unsigned APInt::countTrailingOnesSlowCase() const {
584   unsigned Count = 0;
585   unsigned i = 0;
586   for (; i < getNumWords() && U.pVal[i] == WORD_MAX; ++i)
587     Count += APINT_BITS_PER_WORD;
588   if (i < getNumWords())
589     Count += llvm::countTrailingOnes(U.pVal[i]);
590   assert(Count <= BitWidth);
591   return Count;
592 }
593 
594 unsigned APInt::countPopulationSlowCase() const {
595   unsigned Count = 0;
596   for (unsigned i = 0; i < getNumWords(); ++i)
597     Count += llvm::countPopulation(U.pVal[i]);
598   return Count;
599 }
600 
601 bool APInt::intersectsSlowCase(const APInt &RHS) const {
602   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
603     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
604       return true;
605 
606   return false;
607 }
608 
609 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
610   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
611     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
612       return false;
613 
614   return true;
615 }
616 
617 APInt APInt::byteSwap() const {
618   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
619   if (BitWidth == 16)
620     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
621   if (BitWidth == 32)
622     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
623   if (BitWidth == 48) {
624     unsigned Tmp1 = unsigned(U.VAL >> 16);
625     Tmp1 = ByteSwap_32(Tmp1);
626     uint16_t Tmp2 = uint16_t(U.VAL);
627     Tmp2 = ByteSwap_16(Tmp2);
628     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
629   }
630   if (BitWidth == 64)
631     return APInt(BitWidth, ByteSwap_64(U.VAL));
632 
633   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
634   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
635     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
636   if (Result.BitWidth != BitWidth) {
637     Result.lshrInPlace(Result.BitWidth - BitWidth);
638     Result.BitWidth = BitWidth;
639   }
640   return Result;
641 }
642 
643 APInt APInt::reverseBits() const {
644   switch (BitWidth) {
645   case 64:
646     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
647   case 32:
648     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
649   case 16:
650     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
651   case 8:
652     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
653   default:
654     break;
655   }
656 
657   APInt Val(*this);
658   APInt Reversed(BitWidth, 0);
659   unsigned S = BitWidth;
660 
661   for (; Val != 0; Val.lshrInPlace(1)) {
662     Reversed <<= 1;
663     Reversed |= Val[0];
664     --S;
665   }
666 
667   Reversed <<= S;
668   return Reversed;
669 }
670 
671 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
672   // Fast-path a common case.
673   if (A == B) return A;
674 
675   // Corner cases: if either operand is zero, the other is the gcd.
676   if (!A) return B;
677   if (!B) return A;
678 
679   // Count common powers of 2 and remove all other powers of 2.
680   unsigned Pow2;
681   {
682     unsigned Pow2_A = A.countTrailingZeros();
683     unsigned Pow2_B = B.countTrailingZeros();
684     if (Pow2_A > Pow2_B) {
685       A.lshrInPlace(Pow2_A - Pow2_B);
686       Pow2 = Pow2_B;
687     } else if (Pow2_B > Pow2_A) {
688       B.lshrInPlace(Pow2_B - Pow2_A);
689       Pow2 = Pow2_A;
690     } else {
691       Pow2 = Pow2_A;
692     }
693   }
694 
695   // Both operands are odd multiples of 2^Pow_2:
696   //
697   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
698   //
699   // This is a modified version of Stein's algorithm, taking advantage of
700   // efficient countTrailingZeros().
701   while (A != B) {
702     if (A.ugt(B)) {
703       A -= B;
704       A.lshrInPlace(A.countTrailingZeros() - Pow2);
705     } else {
706       B -= A;
707       B.lshrInPlace(B.countTrailingZeros() - Pow2);
708     }
709   }
710 
711   return A;
712 }
713 
714 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
715   union {
716     double D;
717     uint64_t I;
718   } T;
719   T.D = Double;
720 
721   // Get the sign bit from the highest order bit
722   bool isNeg = T.I >> 63;
723 
724   // Get the 11-bit exponent and adjust for the 1023 bit bias
725   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
726 
727   // If the exponent is negative, the value is < 0 so just return 0.
728   if (exp < 0)
729     return APInt(width, 0u);
730 
731   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
732   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
733 
734   // If the exponent doesn't shift all bits out of the mantissa
735   if (exp < 52)
736     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
737                     APInt(width, mantissa >> (52 - exp));
738 
739   // If the client didn't provide enough bits for us to shift the mantissa into
740   // then the result is undefined, just return 0
741   if (width <= exp - 52)
742     return APInt(width, 0);
743 
744   // Otherwise, we have to shift the mantissa bits up to the right location
745   APInt Tmp(width, mantissa);
746   Tmp <<= (unsigned)exp - 52;
747   return isNeg ? -Tmp : Tmp;
748 }
749 
750 /// This function converts this APInt to a double.
751 /// The layout for double is as following (IEEE Standard 754):
752 ///  --------------------------------------
753 /// |  Sign    Exponent    Fraction    Bias |
754 /// |-------------------------------------- |
755 /// |  1[63]   11[62-52]   52[51-00]   1023 |
756 ///  --------------------------------------
757 double APInt::roundToDouble(bool isSigned) const {
758 
759   // Handle the simple case where the value is contained in one uint64_t.
760   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
761   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
762     if (isSigned) {
763       int64_t sext = SignExtend64(getWord(0), BitWidth);
764       return double(sext);
765     } else
766       return double(getWord(0));
767   }
768 
769   // Determine if the value is negative.
770   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
771 
772   // Construct the absolute value if we're negative.
773   APInt Tmp(isNeg ? -(*this) : (*this));
774 
775   // Figure out how many bits we're using.
776   unsigned n = Tmp.getActiveBits();
777 
778   // The exponent (without bias normalization) is just the number of bits
779   // we are using. Note that the sign bit is gone since we constructed the
780   // absolute value.
781   uint64_t exp = n;
782 
783   // Return infinity for exponent overflow
784   if (exp > 1023) {
785     if (!isSigned || !isNeg)
786       return std::numeric_limits<double>::infinity();
787     else
788       return -std::numeric_limits<double>::infinity();
789   }
790   exp += 1023; // Increment for 1023 bias
791 
792   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
793   // extract the high 52 bits from the correct words in pVal.
794   uint64_t mantissa;
795   unsigned hiWord = whichWord(n-1);
796   if (hiWord == 0) {
797     mantissa = Tmp.U.pVal[0];
798     if (n > 52)
799       mantissa >>= n - 52; // shift down, we want the top 52 bits.
800   } else {
801     assert(hiWord > 0 && "huh?");
802     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
803     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
804     mantissa = hibits | lobits;
805   }
806 
807   // The leading bit of mantissa is implicit, so get rid of it.
808   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
809   union {
810     double D;
811     uint64_t I;
812   } T;
813   T.I = sign | (exp << 52) | mantissa;
814   return T.D;
815 }
816 
817 // Truncate to new width.
818 APInt APInt::trunc(unsigned width) const {
819   assert(width < BitWidth && "Invalid APInt Truncate request");
820   assert(width && "Can't truncate to 0 bits");
821 
822   if (width <= APINT_BITS_PER_WORD)
823     return APInt(width, getRawData()[0]);
824 
825   APInt Result(getMemory(getNumWords(width)), width);
826 
827   // Copy full words.
828   unsigned i;
829   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
830     Result.U.pVal[i] = U.pVal[i];
831 
832   // Truncate and copy any partial word.
833   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
834   if (bits != 0)
835     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
836 
837   return Result;
838 }
839 
840 // Sign extend to a new width.
841 APInt APInt::sext(unsigned Width) const {
842   assert(Width > BitWidth && "Invalid APInt SignExtend request");
843 
844   if (Width <= APINT_BITS_PER_WORD)
845     return APInt(Width, SignExtend64(U.VAL, BitWidth));
846 
847   APInt Result(getMemory(getNumWords(Width)), Width);
848 
849   // Copy words.
850   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
851 
852   // Sign extend the last word since there may be unused bits in the input.
853   Result.U.pVal[getNumWords() - 1] =
854       SignExtend64(Result.U.pVal[getNumWords() - 1],
855                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
856 
857   // Fill with sign bits.
858   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
859               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
860   Result.clearUnusedBits();
861   return Result;
862 }
863 
864 //  Zero extend to a new width.
865 APInt APInt::zext(unsigned width) const {
866   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
867 
868   if (width <= APINT_BITS_PER_WORD)
869     return APInt(width, U.VAL);
870 
871   APInt Result(getMemory(getNumWords(width)), width);
872 
873   // Copy words.
874   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
875 
876   // Zero remaining words.
877   std::memset(Result.U.pVal + getNumWords(), 0,
878               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
879 
880   return Result;
881 }
882 
883 APInt APInt::zextOrTrunc(unsigned width) const {
884   if (BitWidth < width)
885     return zext(width);
886   if (BitWidth > width)
887     return trunc(width);
888   return *this;
889 }
890 
891 APInt APInt::sextOrTrunc(unsigned width) const {
892   if (BitWidth < width)
893     return sext(width);
894   if (BitWidth > width)
895     return trunc(width);
896   return *this;
897 }
898 
899 APInt APInt::zextOrSelf(unsigned width) const {
900   if (BitWidth < width)
901     return zext(width);
902   return *this;
903 }
904 
905 APInt APInt::sextOrSelf(unsigned width) const {
906   if (BitWidth < width)
907     return sext(width);
908   return *this;
909 }
910 
911 /// Arithmetic right-shift this APInt by shiftAmt.
912 /// Arithmetic right-shift function.
913 void APInt::ashrInPlace(const APInt &shiftAmt) {
914   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
915 }
916 
917 /// Arithmetic right-shift this APInt by shiftAmt.
918 /// Arithmetic right-shift function.
919 void APInt::ashrSlowCase(unsigned ShiftAmt) {
920   // Don't bother performing a no-op shift.
921   if (!ShiftAmt)
922     return;
923 
924   // Save the original sign bit for later.
925   bool Negative = isNegative();
926 
927   // WordShift is the inter-part shift; BitShift is intra-part shift.
928   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
929   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
930 
931   unsigned WordsToMove = getNumWords() - WordShift;
932   if (WordsToMove != 0) {
933     // Sign extend the last word to fill in the unused bits.
934     U.pVal[getNumWords() - 1] = SignExtend64(
935         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
936 
937     // Fastpath for moving by whole words.
938     if (BitShift == 0) {
939       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
940     } else {
941       // Move the words containing significant bits.
942       for (unsigned i = 0; i != WordsToMove - 1; ++i)
943         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
944                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
945 
946       // Handle the last word which has no high bits to copy.
947       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
948       // Sign extend one more time.
949       U.pVal[WordsToMove - 1] =
950           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
951     }
952   }
953 
954   // Fill in the remainder based on the original sign.
955   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
956               WordShift * APINT_WORD_SIZE);
957   clearUnusedBits();
958 }
959 
960 /// Logical right-shift this APInt by shiftAmt.
961 /// Logical right-shift function.
962 void APInt::lshrInPlace(const APInt &shiftAmt) {
963   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
964 }
965 
966 /// Logical right-shift this APInt by shiftAmt.
967 /// Logical right-shift function.
968 void APInt::lshrSlowCase(unsigned ShiftAmt) {
969   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
970 }
971 
972 /// Left-shift this APInt by shiftAmt.
973 /// Left-shift function.
974 APInt &APInt::operator<<=(const APInt &shiftAmt) {
975   // It's undefined behavior in C to shift by BitWidth or greater.
976   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
977   return *this;
978 }
979 
980 void APInt::shlSlowCase(unsigned ShiftAmt) {
981   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
982   clearUnusedBits();
983 }
984 
985 // Calculate the rotate amount modulo the bit width.
986 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
987   unsigned rotBitWidth = rotateAmt.getBitWidth();
988   APInt rot = rotateAmt;
989   if (rotBitWidth < BitWidth) {
990     // Extend the rotate APInt, so that the urem doesn't divide by 0.
991     // e.g. APInt(1, 32) would give APInt(1, 0).
992     rot = rotateAmt.zext(BitWidth);
993   }
994   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
995   return rot.getLimitedValue(BitWidth);
996 }
997 
998 APInt APInt::rotl(const APInt &rotateAmt) const {
999   return rotl(rotateModulo(BitWidth, rotateAmt));
1000 }
1001 
1002 APInt APInt::rotl(unsigned rotateAmt) const {
1003   rotateAmt %= BitWidth;
1004   if (rotateAmt == 0)
1005     return *this;
1006   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1007 }
1008 
1009 APInt APInt::rotr(const APInt &rotateAmt) const {
1010   return rotr(rotateModulo(BitWidth, rotateAmt));
1011 }
1012 
1013 APInt APInt::rotr(unsigned rotateAmt) const {
1014   rotateAmt %= BitWidth;
1015   if (rotateAmt == 0)
1016     return *this;
1017   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1018 }
1019 
1020 // Square Root - this method computes and returns the square root of "this".
1021 // Three mechanisms are used for computation. For small values (<= 5 bits),
1022 // a table lookup is done. This gets some performance for common cases. For
1023 // values using less than 52 bits, the value is converted to double and then
1024 // the libc sqrt function is called. The result is rounded and then converted
1025 // back to a uint64_t which is then used to construct the result. Finally,
1026 // the Babylonian method for computing square roots is used.
1027 APInt APInt::sqrt() const {
1028 
1029   // Determine the magnitude of the value.
1030   unsigned magnitude = getActiveBits();
1031 
1032   // Use a fast table for some small values. This also gets rid of some
1033   // rounding errors in libc sqrt for small values.
1034   if (magnitude <= 5) {
1035     static const uint8_t results[32] = {
1036       /*     0 */ 0,
1037       /*  1- 2 */ 1, 1,
1038       /*  3- 6 */ 2, 2, 2, 2,
1039       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1040       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1041       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1042       /*    31 */ 6
1043     };
1044     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1045   }
1046 
1047   // If the magnitude of the value fits in less than 52 bits (the precision of
1048   // an IEEE double precision floating point value), then we can use the
1049   // libc sqrt function which will probably use a hardware sqrt computation.
1050   // This should be faster than the algorithm below.
1051   if (magnitude < 52) {
1052     return APInt(BitWidth,
1053                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1054                                                                : U.pVal[0])))));
1055   }
1056 
1057   // Okay, all the short cuts are exhausted. We must compute it. The following
1058   // is a classical Babylonian method for computing the square root. This code
1059   // was adapted to APInt from a wikipedia article on such computations.
1060   // See http://www.wikipedia.org/ and go to the page named
1061   // Calculate_an_integer_square_root.
1062   unsigned nbits = BitWidth, i = 4;
1063   APInt testy(BitWidth, 16);
1064   APInt x_old(BitWidth, 1);
1065   APInt x_new(BitWidth, 0);
1066   APInt two(BitWidth, 2);
1067 
1068   // Select a good starting value using binary logarithms.
1069   for (;; i += 2, testy = testy.shl(2))
1070     if (i >= nbits || this->ule(testy)) {
1071       x_old = x_old.shl(i / 2);
1072       break;
1073     }
1074 
1075   // Use the Babylonian method to arrive at the integer square root:
1076   for (;;) {
1077     x_new = (this->udiv(x_old) + x_old).udiv(two);
1078     if (x_old.ule(x_new))
1079       break;
1080     x_old = x_new;
1081   }
1082 
1083   // Make sure we return the closest approximation
1084   // NOTE: The rounding calculation below is correct. It will produce an
1085   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1086   // determined to be a rounding issue with pari/gp as it begins to use a
1087   // floating point representation after 192 bits. There are no discrepancies
1088   // between this algorithm and pari/gp for bit widths < 192 bits.
1089   APInt square(x_old * x_old);
1090   APInt nextSquare((x_old + 1) * (x_old +1));
1091   if (this->ult(square))
1092     return x_old;
1093   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1094   APInt midpoint((nextSquare - square).udiv(two));
1095   APInt offset(*this - square);
1096   if (offset.ult(midpoint))
1097     return x_old;
1098   return x_old + 1;
1099 }
1100 
1101 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1102 /// iterative extended Euclidean algorithm is used to solve for this value,
1103 /// however we simplify it to speed up calculating only the inverse, and take
1104 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1105 /// (potentially large) APInts around.
1106 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1107   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1108 
1109   // Using the properties listed at the following web page (accessed 06/21/08):
1110   //   http://www.numbertheory.org/php/euclid.html
1111   // (especially the properties numbered 3, 4 and 9) it can be proved that
1112   // BitWidth bits suffice for all the computations in the algorithm implemented
1113   // below. More precisely, this number of bits suffice if the multiplicative
1114   // inverse exists, but may not suffice for the general extended Euclidean
1115   // algorithm.
1116 
1117   APInt r[2] = { modulo, *this };
1118   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1119   APInt q(BitWidth, 0);
1120 
1121   unsigned i;
1122   for (i = 0; r[i^1] != 0; i ^= 1) {
1123     // An overview of the math without the confusing bit-flipping:
1124     // q = r[i-2] / r[i-1]
1125     // r[i] = r[i-2] % r[i-1]
1126     // t[i] = t[i-2] - t[i-1] * q
1127     udivrem(r[i], r[i^1], q, r[i]);
1128     t[i] -= t[i^1] * q;
1129   }
1130 
1131   // If this APInt and the modulo are not coprime, there is no multiplicative
1132   // inverse, so return 0. We check this by looking at the next-to-last
1133   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1134   // algorithm.
1135   if (r[i] != 1)
1136     return APInt(BitWidth, 0);
1137 
1138   // The next-to-last t is the multiplicative inverse.  However, we are
1139   // interested in a positive inverse. Calculate a positive one from a negative
1140   // one if necessary. A simple addition of the modulo suffices because
1141   // abs(t[i]) is known to be less than *this/2 (see the link above).
1142   if (t[i].isNegative())
1143     t[i] += modulo;
1144 
1145   return std::move(t[i]);
1146 }
1147 
1148 /// Calculate the magic numbers required to implement a signed integer division
1149 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1150 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1151 /// Warren, Jr., chapter 10.
1152 APInt::ms APInt::magic() const {
1153   const APInt& d = *this;
1154   unsigned p;
1155   APInt ad, anc, delta, q1, r1, q2, r2, t;
1156   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1157   struct ms mag;
1158 
1159   ad = d.abs();
1160   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1161   anc = t - 1 - t.urem(ad);   // absolute value of nc
1162   p = d.getBitWidth() - 1;    // initialize p
1163   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1164   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1165   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1166   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1167   do {
1168     p = p + 1;
1169     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1170     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1171     if (r1.uge(anc)) {  // must be unsigned comparison
1172       q1 = q1 + 1;
1173       r1 = r1 - anc;
1174     }
1175     q2 = q2<<1;          // update q2 = 2p/abs(d)
1176     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1177     if (r2.uge(ad)) {   // must be unsigned comparison
1178       q2 = q2 + 1;
1179       r2 = r2 - ad;
1180     }
1181     delta = ad - r2;
1182   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1183 
1184   mag.m = q2 + 1;
1185   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1186   mag.s = p - d.getBitWidth();          // resulting shift
1187   return mag;
1188 }
1189 
1190 /// Calculate the magic numbers required to implement an unsigned integer
1191 /// division by a constant as a sequence of multiplies, adds and shifts.
1192 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1193 /// S. Warren, Jr., chapter 10.
1194 /// LeadingZeros can be used to simplify the calculation if the upper bits
1195 /// of the divided value are known zero.
1196 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1197   const APInt& d = *this;
1198   unsigned p;
1199   APInt nc, delta, q1, r1, q2, r2;
1200   struct mu magu;
1201   magu.a = 0;               // initialize "add" indicator
1202   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1203   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1204   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1205 
1206   nc = allOnes - (allOnes - d).urem(d);
1207   p = d.getBitWidth() - 1;  // initialize p
1208   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1209   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1210   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1211   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1212   do {
1213     p = p + 1;
1214     if (r1.uge(nc - r1)) {
1215       q1 = q1 + q1 + 1;  // update q1
1216       r1 = r1 + r1 - nc; // update r1
1217     }
1218     else {
1219       q1 = q1+q1; // update q1
1220       r1 = r1+r1; // update r1
1221     }
1222     if ((r2 + 1).uge(d - r2)) {
1223       if (q2.uge(signedMax)) magu.a = 1;
1224       q2 = q2+q2 + 1;     // update q2
1225       r2 = r2+r2 + 1 - d; // update r2
1226     }
1227     else {
1228       if (q2.uge(signedMin)) magu.a = 1;
1229       q2 = q2+q2;     // update q2
1230       r2 = r2+r2 + 1; // update r2
1231     }
1232     delta = d - 1 - r2;
1233   } while (p < d.getBitWidth()*2 &&
1234            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1235   magu.m = q2 + 1; // resulting magic number
1236   magu.s = p - d.getBitWidth();  // resulting shift
1237   return magu;
1238 }
1239 
1240 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1241 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1242 /// variables here have the same names as in the algorithm. Comments explain
1243 /// the algorithm and any deviation from it.
1244 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1245                      unsigned m, unsigned n) {
1246   assert(u && "Must provide dividend");
1247   assert(v && "Must provide divisor");
1248   assert(q && "Must provide quotient");
1249   assert(u != v && u != q && v != q && "Must use different memory");
1250   assert(n>1 && "n must be > 1");
1251 
1252   // b denotes the base of the number system. In our case b is 2^32.
1253   const uint64_t b = uint64_t(1) << 32;
1254 
1255 // The DEBUG macros here tend to be spam in the debug output if you're not
1256 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1257 #pragma push_macro("LLVM_DEBUG")
1258 #ifndef KNUTH_DEBUG
1259 #undef LLVM_DEBUG
1260 #define LLVM_DEBUG(X)                                                          \
1261   do {                                                                         \
1262   } while (false)
1263 #endif
1264 
1265   LLVM_DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1266   LLVM_DEBUG(dbgs() << "KnuthDiv: original:");
1267   LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1268   LLVM_DEBUG(dbgs() << " by");
1269   LLVM_DEBUG(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1270   LLVM_DEBUG(dbgs() << '\n');
1271   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1272   // u and v by d. Note that we have taken Knuth's advice here to use a power
1273   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1274   // 2 allows us to shift instead of multiply and it is easy to determine the
1275   // shift amount from the leading zeros.  We are basically normalizing the u
1276   // and v so that its high bits are shifted to the top of v's range without
1277   // overflow. Note that this can require an extra word in u so that u must
1278   // be of length m+n+1.
1279   unsigned shift = countLeadingZeros(v[n-1]);
1280   uint32_t v_carry = 0;
1281   uint32_t u_carry = 0;
1282   if (shift) {
1283     for (unsigned i = 0; i < m+n; ++i) {
1284       uint32_t u_tmp = u[i] >> (32 - shift);
1285       u[i] = (u[i] << shift) | u_carry;
1286       u_carry = u_tmp;
1287     }
1288     for (unsigned i = 0; i < n; ++i) {
1289       uint32_t v_tmp = v[i] >> (32 - shift);
1290       v[i] = (v[i] << shift) | v_carry;
1291       v_carry = v_tmp;
1292     }
1293   }
1294   u[m+n] = u_carry;
1295 
1296   LLVM_DEBUG(dbgs() << "KnuthDiv:   normal:");
1297   LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1298   LLVM_DEBUG(dbgs() << " by");
1299   LLVM_DEBUG(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1300   LLVM_DEBUG(dbgs() << '\n');
1301 
1302   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1303   int j = m;
1304   do {
1305     LLVM_DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1306     // D3. [Calculate q'.].
1307     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1308     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1309     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1310     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1311     // on v[n-2] determines at high speed most of the cases in which the trial
1312     // value qp is one too large, and it eliminates all cases where qp is two
1313     // too large.
1314     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1315     LLVM_DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1316     uint64_t qp = dividend / v[n-1];
1317     uint64_t rp = dividend % v[n-1];
1318     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1319       qp--;
1320       rp += v[n-1];
1321       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1322         qp--;
1323     }
1324     LLVM_DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1325 
1326     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1327     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1328     // consists of a simple multiplication by a one-place number, combined with
1329     // a subtraction.
1330     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1331     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1332     // true value plus b**(n+1), namely as the b's complement of
1333     // the true value, and a "borrow" to the left should be remembered.
1334     int64_t borrow = 0;
1335     for (unsigned i = 0; i < n; ++i) {
1336       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1337       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1338       u[j+i] = Lo_32(subres);
1339       borrow = Hi_32(p) - Hi_32(subres);
1340       LLVM_DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1341                         << ", borrow = " << borrow << '\n');
1342     }
1343     bool isNeg = u[j+n] < borrow;
1344     u[j+n] -= Lo_32(borrow);
1345 
1346     LLVM_DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1347     LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1348     LLVM_DEBUG(dbgs() << '\n');
1349 
1350     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1351     // negative, go to step D6; otherwise go on to step D7.
1352     q[j] = Lo_32(qp);
1353     if (isNeg) {
1354       // D6. [Add back]. The probability that this step is necessary is very
1355       // small, on the order of only 2/b. Make sure that test data accounts for
1356       // this possibility. Decrease q[j] by 1
1357       q[j]--;
1358       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1359       // A carry will occur to the left of u[j+n], and it should be ignored
1360       // since it cancels with the borrow that occurred in D4.
1361       bool carry = false;
1362       for (unsigned i = 0; i < n; i++) {
1363         uint32_t limit = std::min(u[j+i],v[i]);
1364         u[j+i] += v[i] + carry;
1365         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1366       }
1367       u[j+n] += carry;
1368     }
1369     LLVM_DEBUG(dbgs() << "KnuthDiv: after correction:");
1370     LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1371     LLVM_DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1372 
1373     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1374   } while (--j >= 0);
1375 
1376   LLVM_DEBUG(dbgs() << "KnuthDiv: quotient:");
1377   LLVM_DEBUG(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1378   LLVM_DEBUG(dbgs() << '\n');
1379 
1380   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1381   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1382   // compute the remainder (urem uses this).
1383   if (r) {
1384     // The value d is expressed by the "shift" value above since we avoided
1385     // multiplication by d by using a shift left. So, all we have to do is
1386     // shift right here.
1387     if (shift) {
1388       uint32_t carry = 0;
1389       LLVM_DEBUG(dbgs() << "KnuthDiv: remainder:");
1390       for (int i = n-1; i >= 0; i--) {
1391         r[i] = (u[i] >> shift) | carry;
1392         carry = u[i] << (32 - shift);
1393         LLVM_DEBUG(dbgs() << " " << r[i]);
1394       }
1395     } else {
1396       for (int i = n-1; i >= 0; i--) {
1397         r[i] = u[i];
1398         LLVM_DEBUG(dbgs() << " " << r[i]);
1399       }
1400     }
1401     LLVM_DEBUG(dbgs() << '\n');
1402   }
1403   LLVM_DEBUG(dbgs() << '\n');
1404 
1405 #pragma pop_macro("LLVM_DEBUG")
1406 }
1407 
1408 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1409                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1410   assert(lhsWords >= rhsWords && "Fractional result");
1411 
1412   // First, compose the values into an array of 32-bit words instead of
1413   // 64-bit words. This is a necessity of both the "short division" algorithm
1414   // and the Knuth "classical algorithm" which requires there to be native
1415   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1416   // can't use 64-bit operands here because we don't have native results of
1417   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1418   // work on large-endian machines.
1419   unsigned n = rhsWords * 2;
1420   unsigned m = (lhsWords * 2) - n;
1421 
1422   // Allocate space for the temporary values we need either on the stack, if
1423   // it will fit, or on the heap if it won't.
1424   uint32_t SPACE[128];
1425   uint32_t *U = nullptr;
1426   uint32_t *V = nullptr;
1427   uint32_t *Q = nullptr;
1428   uint32_t *R = nullptr;
1429   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1430     U = &SPACE[0];
1431     V = &SPACE[m+n+1];
1432     Q = &SPACE[(m+n+1) + n];
1433     if (Remainder)
1434       R = &SPACE[(m+n+1) + n + (m+n)];
1435   } else {
1436     U = new uint32_t[m + n + 1];
1437     V = new uint32_t[n];
1438     Q = new uint32_t[m+n];
1439     if (Remainder)
1440       R = new uint32_t[n];
1441   }
1442 
1443   // Initialize the dividend
1444   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1445   for (unsigned i = 0; i < lhsWords; ++i) {
1446     uint64_t tmp = LHS[i];
1447     U[i * 2] = Lo_32(tmp);
1448     U[i * 2 + 1] = Hi_32(tmp);
1449   }
1450   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1451 
1452   // Initialize the divisor
1453   memset(V, 0, (n)*sizeof(uint32_t));
1454   for (unsigned i = 0; i < rhsWords; ++i) {
1455     uint64_t tmp = RHS[i];
1456     V[i * 2] = Lo_32(tmp);
1457     V[i * 2 + 1] = Hi_32(tmp);
1458   }
1459 
1460   // initialize the quotient and remainder
1461   memset(Q, 0, (m+n) * sizeof(uint32_t));
1462   if (Remainder)
1463     memset(R, 0, n * sizeof(uint32_t));
1464 
1465   // Now, adjust m and n for the Knuth division. n is the number of words in
1466   // the divisor. m is the number of words by which the dividend exceeds the
1467   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1468   // contain any zero words or the Knuth algorithm fails.
1469   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1470     n--;
1471     m++;
1472   }
1473   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1474     m--;
1475 
1476   // If we're left with only a single word for the divisor, Knuth doesn't work
1477   // so we implement the short division algorithm here. This is much simpler
1478   // and faster because we are certain that we can divide a 64-bit quantity
1479   // by a 32-bit quantity at hardware speed and short division is simply a
1480   // series of such operations. This is just like doing short division but we
1481   // are using base 2^32 instead of base 10.
1482   assert(n != 0 && "Divide by zero?");
1483   if (n == 1) {
1484     uint32_t divisor = V[0];
1485     uint32_t remainder = 0;
1486     for (int i = m; i >= 0; i--) {
1487       uint64_t partial_dividend = Make_64(remainder, U[i]);
1488       if (partial_dividend == 0) {
1489         Q[i] = 0;
1490         remainder = 0;
1491       } else if (partial_dividend < divisor) {
1492         Q[i] = 0;
1493         remainder = Lo_32(partial_dividend);
1494       } else if (partial_dividend == divisor) {
1495         Q[i] = 1;
1496         remainder = 0;
1497       } else {
1498         Q[i] = Lo_32(partial_dividend / divisor);
1499         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1500       }
1501     }
1502     if (R)
1503       R[0] = remainder;
1504   } else {
1505     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1506     // case n > 1.
1507     KnuthDiv(U, V, Q, R, m, n);
1508   }
1509 
1510   // If the caller wants the quotient
1511   if (Quotient) {
1512     for (unsigned i = 0; i < lhsWords; ++i)
1513       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1514   }
1515 
1516   // If the caller wants the remainder
1517   if (Remainder) {
1518     for (unsigned i = 0; i < rhsWords; ++i)
1519       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1520   }
1521 
1522   // Clean up the memory we allocated.
1523   if (U != &SPACE[0]) {
1524     delete [] U;
1525     delete [] V;
1526     delete [] Q;
1527     delete [] R;
1528   }
1529 }
1530 
1531 APInt APInt::udiv(const APInt &RHS) const {
1532   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1533 
1534   // First, deal with the easy case
1535   if (isSingleWord()) {
1536     assert(RHS.U.VAL != 0 && "Divide by zero?");
1537     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1538   }
1539 
1540   // Get some facts about the LHS and RHS number of bits and words
1541   unsigned lhsWords = getNumWords(getActiveBits());
1542   unsigned rhsBits  = RHS.getActiveBits();
1543   unsigned rhsWords = getNumWords(rhsBits);
1544   assert(rhsWords && "Divided by zero???");
1545 
1546   // Deal with some degenerate cases
1547   if (!lhsWords)
1548     // 0 / X ===> 0
1549     return APInt(BitWidth, 0);
1550   if (rhsBits == 1)
1551     // X / 1 ===> X
1552     return *this;
1553   if (lhsWords < rhsWords || this->ult(RHS))
1554     // X / Y ===> 0, iff X < Y
1555     return APInt(BitWidth, 0);
1556   if (*this == RHS)
1557     // X / X ===> 1
1558     return APInt(BitWidth, 1);
1559   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1560     // All high words are zero, just use native divide
1561     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1562 
1563   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1564   APInt Quotient(BitWidth, 0); // to hold result.
1565   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1566   return Quotient;
1567 }
1568 
1569 APInt APInt::udiv(uint64_t RHS) const {
1570   assert(RHS != 0 && "Divide by zero?");
1571 
1572   // First, deal with the easy case
1573   if (isSingleWord())
1574     return APInt(BitWidth, U.VAL / RHS);
1575 
1576   // Get some facts about the LHS words.
1577   unsigned lhsWords = getNumWords(getActiveBits());
1578 
1579   // Deal with some degenerate cases
1580   if (!lhsWords)
1581     // 0 / X ===> 0
1582     return APInt(BitWidth, 0);
1583   if (RHS == 1)
1584     // X / 1 ===> X
1585     return *this;
1586   if (this->ult(RHS))
1587     // X / Y ===> 0, iff X < Y
1588     return APInt(BitWidth, 0);
1589   if (*this == RHS)
1590     // X / X ===> 1
1591     return APInt(BitWidth, 1);
1592   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1593     // All high words are zero, just use native divide
1594     return APInt(BitWidth, this->U.pVal[0] / RHS);
1595 
1596   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1597   APInt Quotient(BitWidth, 0); // to hold result.
1598   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1599   return Quotient;
1600 }
1601 
1602 APInt APInt::sdiv(const APInt &RHS) const {
1603   if (isNegative()) {
1604     if (RHS.isNegative())
1605       return (-(*this)).udiv(-RHS);
1606     return -((-(*this)).udiv(RHS));
1607   }
1608   if (RHS.isNegative())
1609     return -(this->udiv(-RHS));
1610   return this->udiv(RHS);
1611 }
1612 
1613 APInt APInt::sdiv(int64_t RHS) const {
1614   if (isNegative()) {
1615     if (RHS < 0)
1616       return (-(*this)).udiv(-RHS);
1617     return -((-(*this)).udiv(RHS));
1618   }
1619   if (RHS < 0)
1620     return -(this->udiv(-RHS));
1621   return this->udiv(RHS);
1622 }
1623 
1624 APInt APInt::urem(const APInt &RHS) const {
1625   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1626   if (isSingleWord()) {
1627     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1628     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1629   }
1630 
1631   // Get some facts about the LHS
1632   unsigned lhsWords = getNumWords(getActiveBits());
1633 
1634   // Get some facts about the RHS
1635   unsigned rhsBits = RHS.getActiveBits();
1636   unsigned rhsWords = getNumWords(rhsBits);
1637   assert(rhsWords && "Performing remainder operation by zero ???");
1638 
1639   // Check the degenerate cases
1640   if (lhsWords == 0)
1641     // 0 % Y ===> 0
1642     return APInt(BitWidth, 0);
1643   if (rhsBits == 1)
1644     // X % 1 ===> 0
1645     return APInt(BitWidth, 0);
1646   if (lhsWords < rhsWords || this->ult(RHS))
1647     // X % Y ===> X, iff X < Y
1648     return *this;
1649   if (*this == RHS)
1650     // X % X == 0;
1651     return APInt(BitWidth, 0);
1652   if (lhsWords == 1)
1653     // All high words are zero, just use native remainder
1654     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1655 
1656   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1657   APInt Remainder(BitWidth, 0);
1658   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1659   return Remainder;
1660 }
1661 
1662 uint64_t APInt::urem(uint64_t RHS) const {
1663   assert(RHS != 0 && "Remainder by zero?");
1664 
1665   if (isSingleWord())
1666     return U.VAL % RHS;
1667 
1668   // Get some facts about the LHS
1669   unsigned lhsWords = getNumWords(getActiveBits());
1670 
1671   // Check the degenerate cases
1672   if (lhsWords == 0)
1673     // 0 % Y ===> 0
1674     return 0;
1675   if (RHS == 1)
1676     // X % 1 ===> 0
1677     return 0;
1678   if (this->ult(RHS))
1679     // X % Y ===> X, iff X < Y
1680     return getZExtValue();
1681   if (*this == RHS)
1682     // X % X == 0;
1683     return 0;
1684   if (lhsWords == 1)
1685     // All high words are zero, just use native remainder
1686     return U.pVal[0] % RHS;
1687 
1688   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1689   uint64_t Remainder;
1690   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1691   return Remainder;
1692 }
1693 
1694 APInt APInt::srem(const APInt &RHS) const {
1695   if (isNegative()) {
1696     if (RHS.isNegative())
1697       return -((-(*this)).urem(-RHS));
1698     return -((-(*this)).urem(RHS));
1699   }
1700   if (RHS.isNegative())
1701     return this->urem(-RHS);
1702   return this->urem(RHS);
1703 }
1704 
1705 int64_t APInt::srem(int64_t RHS) const {
1706   if (isNegative()) {
1707     if (RHS < 0)
1708       return -((-(*this)).urem(-RHS));
1709     return -((-(*this)).urem(RHS));
1710   }
1711   if (RHS < 0)
1712     return this->urem(-RHS);
1713   return this->urem(RHS);
1714 }
1715 
1716 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1717                     APInt &Quotient, APInt &Remainder) {
1718   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1719   unsigned BitWidth = LHS.BitWidth;
1720 
1721   // First, deal with the easy case
1722   if (LHS.isSingleWord()) {
1723     assert(RHS.U.VAL != 0 && "Divide by zero?");
1724     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1725     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1726     Quotient = APInt(BitWidth, QuotVal);
1727     Remainder = APInt(BitWidth, RemVal);
1728     return;
1729   }
1730 
1731   // Get some size facts about the dividend and divisor
1732   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1733   unsigned rhsBits  = RHS.getActiveBits();
1734   unsigned rhsWords = getNumWords(rhsBits);
1735   assert(rhsWords && "Performing divrem operation by zero ???");
1736 
1737   // Check the degenerate cases
1738   if (lhsWords == 0) {
1739     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1740     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1741     return;
1742   }
1743 
1744   if (rhsBits == 1) {
1745     Quotient = LHS;                   // X / 1 ===> X
1746     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1747   }
1748 
1749   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1750     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1751     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1752     return;
1753   }
1754 
1755   if (LHS == RHS) {
1756     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1757     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1758     return;
1759   }
1760 
1761   // Make sure there is enough space to hold the results.
1762   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1763   // change the size. This is necessary if Quotient or Remainder is aliased
1764   // with LHS or RHS.
1765   Quotient.reallocate(BitWidth);
1766   Remainder.reallocate(BitWidth);
1767 
1768   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1769     // There is only one word to consider so use the native versions.
1770     uint64_t lhsValue = LHS.U.pVal[0];
1771     uint64_t rhsValue = RHS.U.pVal[0];
1772     Quotient = lhsValue / rhsValue;
1773     Remainder = lhsValue % rhsValue;
1774     return;
1775   }
1776 
1777   // Okay, lets do it the long way
1778   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1779          Remainder.U.pVal);
1780   // Clear the rest of the Quotient and Remainder.
1781   std::memset(Quotient.U.pVal + lhsWords, 0,
1782               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1783   std::memset(Remainder.U.pVal + rhsWords, 0,
1784               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1785 }
1786 
1787 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1788                     uint64_t &Remainder) {
1789   assert(RHS != 0 && "Divide by zero?");
1790   unsigned BitWidth = LHS.BitWidth;
1791 
1792   // First, deal with the easy case
1793   if (LHS.isSingleWord()) {
1794     uint64_t QuotVal = LHS.U.VAL / RHS;
1795     Remainder = LHS.U.VAL % RHS;
1796     Quotient = APInt(BitWidth, QuotVal);
1797     return;
1798   }
1799 
1800   // Get some size facts about the dividend and divisor
1801   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1802 
1803   // Check the degenerate cases
1804   if (lhsWords == 0) {
1805     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1806     Remainder = 0;                    // 0 % Y ===> 0
1807     return;
1808   }
1809 
1810   if (RHS == 1) {
1811     Quotient = LHS;                   // X / 1 ===> X
1812     Remainder = 0;                    // X % 1 ===> 0
1813     return;
1814   }
1815 
1816   if (LHS.ult(RHS)) {
1817     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1818     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1819     return;
1820   }
1821 
1822   if (LHS == RHS) {
1823     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1824     Remainder = 0;                    // X % X ===> 0;
1825     return;
1826   }
1827 
1828   // Make sure there is enough space to hold the results.
1829   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1830   // change the size. This is necessary if Quotient is aliased with LHS.
1831   Quotient.reallocate(BitWidth);
1832 
1833   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1834     // There is only one word to consider so use the native versions.
1835     uint64_t lhsValue = LHS.U.pVal[0];
1836     Quotient = lhsValue / RHS;
1837     Remainder = lhsValue % RHS;
1838     return;
1839   }
1840 
1841   // Okay, lets do it the long way
1842   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1843   // Clear the rest of the Quotient.
1844   std::memset(Quotient.U.pVal + lhsWords, 0,
1845               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1846 }
1847 
1848 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1849                     APInt &Quotient, APInt &Remainder) {
1850   if (LHS.isNegative()) {
1851     if (RHS.isNegative())
1852       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1853     else {
1854       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1855       Quotient.negate();
1856     }
1857     Remainder.negate();
1858   } else if (RHS.isNegative()) {
1859     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1860     Quotient.negate();
1861   } else {
1862     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1863   }
1864 }
1865 
1866 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1867                     APInt &Quotient, int64_t &Remainder) {
1868   uint64_t R = Remainder;
1869   if (LHS.isNegative()) {
1870     if (RHS < 0)
1871       APInt::udivrem(-LHS, -RHS, Quotient, R);
1872     else {
1873       APInt::udivrem(-LHS, RHS, Quotient, R);
1874       Quotient.negate();
1875     }
1876     R = -R;
1877   } else if (RHS < 0) {
1878     APInt::udivrem(LHS, -RHS, Quotient, R);
1879     Quotient.negate();
1880   } else {
1881     APInt::udivrem(LHS, RHS, Quotient, R);
1882   }
1883   Remainder = R;
1884 }
1885 
1886 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1887   APInt Res = *this+RHS;
1888   Overflow = isNonNegative() == RHS.isNonNegative() &&
1889              Res.isNonNegative() != isNonNegative();
1890   return Res;
1891 }
1892 
1893 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1894   APInt Res = *this+RHS;
1895   Overflow = Res.ult(RHS);
1896   return Res;
1897 }
1898 
1899 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1900   APInt Res = *this - RHS;
1901   Overflow = isNonNegative() != RHS.isNonNegative() &&
1902              Res.isNonNegative() != isNonNegative();
1903   return Res;
1904 }
1905 
1906 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1907   APInt Res = *this-RHS;
1908   Overflow = Res.ugt(*this);
1909   return Res;
1910 }
1911 
1912 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1913   // MININT/-1  -->  overflow.
1914   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1915   return sdiv(RHS);
1916 }
1917 
1918 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1919   APInt Res = *this * RHS;
1920 
1921   if (*this != 0 && RHS != 0)
1922     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1923   else
1924     Overflow = false;
1925   return Res;
1926 }
1927 
1928 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1929   APInt Res = *this * RHS;
1930 
1931   if (*this != 0 && RHS != 0)
1932     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1933   else
1934     Overflow = false;
1935   return Res;
1936 }
1937 
1938 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1939   Overflow = ShAmt.uge(getBitWidth());
1940   if (Overflow)
1941     return APInt(BitWidth, 0);
1942 
1943   if (isNonNegative()) // Don't allow sign change.
1944     Overflow = ShAmt.uge(countLeadingZeros());
1945   else
1946     Overflow = ShAmt.uge(countLeadingOnes());
1947 
1948   return *this << ShAmt;
1949 }
1950 
1951 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1952   Overflow = ShAmt.uge(getBitWidth());
1953   if (Overflow)
1954     return APInt(BitWidth, 0);
1955 
1956   Overflow = ShAmt.ugt(countLeadingZeros());
1957 
1958   return *this << ShAmt;
1959 }
1960 
1961 
1962 
1963 
1964 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1965   // Check our assumptions here
1966   assert(!str.empty() && "Invalid string length");
1967   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1968           radix == 36) &&
1969          "Radix should be 2, 8, 10, 16, or 36!");
1970 
1971   StringRef::iterator p = str.begin();
1972   size_t slen = str.size();
1973   bool isNeg = *p == '-';
1974   if (*p == '-' || *p == '+') {
1975     p++;
1976     slen--;
1977     assert(slen && "String is only a sign, needs a value.");
1978   }
1979   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1980   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
1981   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
1982   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
1983          "Insufficient bit width");
1984 
1985   // Allocate memory if needed
1986   if (isSingleWord())
1987     U.VAL = 0;
1988   else
1989     U.pVal = getClearedMemory(getNumWords());
1990 
1991   // Figure out if we can shift instead of multiply
1992   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1993 
1994   // Enter digit traversal loop
1995   for (StringRef::iterator e = str.end(); p != e; ++p) {
1996     unsigned digit = getDigit(*p, radix);
1997     assert(digit < radix && "Invalid character in digit string");
1998 
1999     // Shift or multiply the value by the radix
2000     if (slen > 1) {
2001       if (shift)
2002         *this <<= shift;
2003       else
2004         *this *= radix;
2005     }
2006 
2007     // Add in the digit we just interpreted
2008     *this += digit;
2009   }
2010   // If its negative, put it in two's complement form
2011   if (isNeg)
2012     this->negate();
2013 }
2014 
2015 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2016                      bool Signed, bool formatAsCLiteral) const {
2017   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2018           Radix == 36) &&
2019          "Radix should be 2, 8, 10, 16, or 36!");
2020 
2021   const char *Prefix = "";
2022   if (formatAsCLiteral) {
2023     switch (Radix) {
2024       case 2:
2025         // Binary literals are a non-standard extension added in gcc 4.3:
2026         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2027         Prefix = "0b";
2028         break;
2029       case 8:
2030         Prefix = "0";
2031         break;
2032       case 10:
2033         break; // No prefix
2034       case 16:
2035         Prefix = "0x";
2036         break;
2037       default:
2038         llvm_unreachable("Invalid radix!");
2039     }
2040   }
2041 
2042   // First, check for a zero value and just short circuit the logic below.
2043   if (*this == 0) {
2044     while (*Prefix) {
2045       Str.push_back(*Prefix);
2046       ++Prefix;
2047     };
2048     Str.push_back('0');
2049     return;
2050   }
2051 
2052   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2053 
2054   if (isSingleWord()) {
2055     char Buffer[65];
2056     char *BufPtr = std::end(Buffer);
2057 
2058     uint64_t N;
2059     if (!Signed) {
2060       N = getZExtValue();
2061     } else {
2062       int64_t I = getSExtValue();
2063       if (I >= 0) {
2064         N = I;
2065       } else {
2066         Str.push_back('-');
2067         N = -(uint64_t)I;
2068       }
2069     }
2070 
2071     while (*Prefix) {
2072       Str.push_back(*Prefix);
2073       ++Prefix;
2074     };
2075 
2076     while (N) {
2077       *--BufPtr = Digits[N % Radix];
2078       N /= Radix;
2079     }
2080     Str.append(BufPtr, std::end(Buffer));
2081     return;
2082   }
2083 
2084   APInt Tmp(*this);
2085 
2086   if (Signed && isNegative()) {
2087     // They want to print the signed version and it is a negative value
2088     // Flip the bits and add one to turn it into the equivalent positive
2089     // value and put a '-' in the result.
2090     Tmp.negate();
2091     Str.push_back('-');
2092   }
2093 
2094   while (*Prefix) {
2095     Str.push_back(*Prefix);
2096     ++Prefix;
2097   };
2098 
2099   // We insert the digits backward, then reverse them to get the right order.
2100   unsigned StartDig = Str.size();
2101 
2102   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2103   // because the number of bits per digit (1, 3 and 4 respectively) divides
2104   // equally.  We just shift until the value is zero.
2105   if (Radix == 2 || Radix == 8 || Radix == 16) {
2106     // Just shift tmp right for each digit width until it becomes zero
2107     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2108     unsigned MaskAmt = Radix - 1;
2109 
2110     while (Tmp.getBoolValue()) {
2111       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2112       Str.push_back(Digits[Digit]);
2113       Tmp.lshrInPlace(ShiftAmt);
2114     }
2115   } else {
2116     while (Tmp.getBoolValue()) {
2117       uint64_t Digit;
2118       udivrem(Tmp, Radix, Tmp, Digit);
2119       assert(Digit < Radix && "divide failed");
2120       Str.push_back(Digits[Digit]);
2121     }
2122   }
2123 
2124   // Reverse the digits before returning.
2125   std::reverse(Str.begin()+StartDig, Str.end());
2126 }
2127 
2128 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2129 /// It is better to pass in a SmallVector/SmallString to the methods above.
2130 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2131   SmallString<40> S;
2132   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2133   return S.str();
2134 }
2135 
2136 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2137 LLVM_DUMP_METHOD void APInt::dump() const {
2138   SmallString<40> S, U;
2139   this->toStringUnsigned(U);
2140   this->toStringSigned(S);
2141   dbgs() << "APInt(" << BitWidth << "b, "
2142          << U << "u " << S << "s)\n";
2143 }
2144 #endif
2145 
2146 void APInt::print(raw_ostream &OS, bool isSigned) const {
2147   SmallString<40> S;
2148   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2149   OS << S;
2150 }
2151 
2152 // This implements a variety of operations on a representation of
2153 // arbitrary precision, two's-complement, bignum integer values.
2154 
2155 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2156 // and unrestricting assumption.
2157 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2158               "Part width must be divisible by 2!");
2159 
2160 /* Some handy functions local to this file.  */
2161 
2162 /* Returns the integer part with the least significant BITS set.
2163    BITS cannot be zero.  */
2164 static inline APInt::WordType lowBitMask(unsigned bits) {
2165   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2166 
2167   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2168 }
2169 
2170 /* Returns the value of the lower half of PART.  */
2171 static inline APInt::WordType lowHalf(APInt::WordType part) {
2172   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2173 }
2174 
2175 /* Returns the value of the upper half of PART.  */
2176 static inline APInt::WordType highHalf(APInt::WordType part) {
2177   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2178 }
2179 
2180 /* Returns the bit number of the most significant set bit of a part.
2181    If the input number has no bits set -1U is returned.  */
2182 static unsigned partMSB(APInt::WordType value) {
2183   return findLastSet(value, ZB_Max);
2184 }
2185 
2186 /* Returns the bit number of the least significant set bit of a
2187    part.  If the input number has no bits set -1U is returned.  */
2188 static unsigned partLSB(APInt::WordType value) {
2189   return findFirstSet(value, ZB_Max);
2190 }
2191 
2192 /* Sets the least significant part of a bignum to the input value, and
2193    zeroes out higher parts.  */
2194 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2195   assert(parts > 0);
2196 
2197   dst[0] = part;
2198   for (unsigned i = 1; i < parts; i++)
2199     dst[i] = 0;
2200 }
2201 
2202 /* Assign one bignum to another.  */
2203 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2204   for (unsigned i = 0; i < parts; i++)
2205     dst[i] = src[i];
2206 }
2207 
2208 /* Returns true if a bignum is zero, false otherwise.  */
2209 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2210   for (unsigned i = 0; i < parts; i++)
2211     if (src[i])
2212       return false;
2213 
2214   return true;
2215 }
2216 
2217 /* Extract the given bit of a bignum; returns 0 or 1.  */
2218 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2219   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2220 }
2221 
2222 /* Set the given bit of a bignum. */
2223 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2224   parts[whichWord(bit)] |= maskBit(bit);
2225 }
2226 
2227 /* Clears the given bit of a bignum. */
2228 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2229   parts[whichWord(bit)] &= ~maskBit(bit);
2230 }
2231 
2232 /* Returns the bit number of the least significant set bit of a
2233    number.  If the input number has no bits set -1U is returned.  */
2234 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2235   for (unsigned i = 0; i < n; i++) {
2236     if (parts[i] != 0) {
2237       unsigned lsb = partLSB(parts[i]);
2238 
2239       return lsb + i * APINT_BITS_PER_WORD;
2240     }
2241   }
2242 
2243   return -1U;
2244 }
2245 
2246 /* Returns the bit number of the most significant set bit of a number.
2247    If the input number has no bits set -1U is returned.  */
2248 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2249   do {
2250     --n;
2251 
2252     if (parts[n] != 0) {
2253       unsigned msb = partMSB(parts[n]);
2254 
2255       return msb + n * APINT_BITS_PER_WORD;
2256     }
2257   } while (n);
2258 
2259   return -1U;
2260 }
2261 
2262 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2263    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2264    the least significant bit of DST.  All high bits above srcBITS in
2265    DST are zero-filled.  */
2266 void
2267 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2268                  unsigned srcBits, unsigned srcLSB) {
2269   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2270   assert(dstParts <= dstCount);
2271 
2272   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2273   tcAssign (dst, src + firstSrcPart, dstParts);
2274 
2275   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2276   tcShiftRight (dst, dstParts, shift);
2277 
2278   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2279      in DST.  If this is less that srcBits, append the rest, else
2280      clear the high bits.  */
2281   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2282   if (n < srcBits) {
2283     WordType mask = lowBitMask (srcBits - n);
2284     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2285                           << n % APINT_BITS_PER_WORD);
2286   } else if (n > srcBits) {
2287     if (srcBits % APINT_BITS_PER_WORD)
2288       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2289   }
2290 
2291   /* Clear high parts.  */
2292   while (dstParts < dstCount)
2293     dst[dstParts++] = 0;
2294 }
2295 
2296 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2297 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2298                              WordType c, unsigned parts) {
2299   assert(c <= 1);
2300 
2301   for (unsigned i = 0; i < parts; i++) {
2302     WordType l = dst[i];
2303     if (c) {
2304       dst[i] += rhs[i] + 1;
2305       c = (dst[i] <= l);
2306     } else {
2307       dst[i] += rhs[i];
2308       c = (dst[i] < l);
2309     }
2310   }
2311 
2312   return c;
2313 }
2314 
2315 /// This function adds a single "word" integer, src, to the multiple
2316 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2317 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2318 /// @returns the carry of the addition.
2319 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2320                                  unsigned parts) {
2321   for (unsigned i = 0; i < parts; ++i) {
2322     dst[i] += src;
2323     if (dst[i] >= src)
2324       return 0; // No need to carry so exit early.
2325     src = 1; // Carry one to next digit.
2326   }
2327 
2328   return 1;
2329 }
2330 
2331 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2332 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2333                                   WordType c, unsigned parts) {
2334   assert(c <= 1);
2335 
2336   for (unsigned i = 0; i < parts; i++) {
2337     WordType l = dst[i];
2338     if (c) {
2339       dst[i] -= rhs[i] + 1;
2340       c = (dst[i] >= l);
2341     } else {
2342       dst[i] -= rhs[i];
2343       c = (dst[i] > l);
2344     }
2345   }
2346 
2347   return c;
2348 }
2349 
2350 /// This function subtracts a single "word" (64-bit word), src, from
2351 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2352 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2353 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2354 /// exhausted. In other words, if src > dst then this function returns 1,
2355 /// otherwise 0.
2356 /// @returns the borrow out of the subtraction
2357 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2358                                       unsigned parts) {
2359   for (unsigned i = 0; i < parts; ++i) {
2360     WordType Dst = dst[i];
2361     dst[i] -= src;
2362     if (src <= Dst)
2363       return 0; // No need to borrow so exit early.
2364     src = 1; // We have to "borrow 1" from next "word"
2365   }
2366 
2367   return 1;
2368 }
2369 
2370 /* Negate a bignum in-place.  */
2371 void APInt::tcNegate(WordType *dst, unsigned parts) {
2372   tcComplement(dst, parts);
2373   tcIncrement(dst, parts);
2374 }
2375 
2376 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2377     DST  = SRC * MULTIPLIER + CARRY   if add is false
2378 
2379     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2380     they must start at the same point, i.e. DST == SRC.
2381 
2382     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2383     returned.  Otherwise DST is filled with the least significant
2384     DSTPARTS parts of the result, and if all of the omitted higher
2385     parts were zero return zero, otherwise overflow occurred and
2386     return one.  */
2387 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2388                           WordType multiplier, WordType carry,
2389                           unsigned srcParts, unsigned dstParts,
2390                           bool add) {
2391   /* Otherwise our writes of DST kill our later reads of SRC.  */
2392   assert(dst <= src || dst >= src + srcParts);
2393   assert(dstParts <= srcParts + 1);
2394 
2395   /* N loops; minimum of dstParts and srcParts.  */
2396   unsigned n = std::min(dstParts, srcParts);
2397 
2398   for (unsigned i = 0; i < n; i++) {
2399     WordType low, mid, high, srcPart;
2400 
2401       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2402 
2403          This cannot overflow, because
2404 
2405          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2406 
2407          which is less than n^2.  */
2408 
2409     srcPart = src[i];
2410 
2411     if (multiplier == 0 || srcPart == 0) {
2412       low = carry;
2413       high = 0;
2414     } else {
2415       low = lowHalf(srcPart) * lowHalf(multiplier);
2416       high = highHalf(srcPart) * highHalf(multiplier);
2417 
2418       mid = lowHalf(srcPart) * highHalf(multiplier);
2419       high += highHalf(mid);
2420       mid <<= APINT_BITS_PER_WORD / 2;
2421       if (low + mid < low)
2422         high++;
2423       low += mid;
2424 
2425       mid = highHalf(srcPart) * lowHalf(multiplier);
2426       high += highHalf(mid);
2427       mid <<= APINT_BITS_PER_WORD / 2;
2428       if (low + mid < low)
2429         high++;
2430       low += mid;
2431 
2432       /* Now add carry.  */
2433       if (low + carry < low)
2434         high++;
2435       low += carry;
2436     }
2437 
2438     if (add) {
2439       /* And now DST[i], and store the new low part there.  */
2440       if (low + dst[i] < low)
2441         high++;
2442       dst[i] += low;
2443     } else
2444       dst[i] = low;
2445 
2446     carry = high;
2447   }
2448 
2449   if (srcParts < dstParts) {
2450     /* Full multiplication, there is no overflow.  */
2451     assert(srcParts + 1 == dstParts);
2452     dst[srcParts] = carry;
2453     return 0;
2454   }
2455 
2456   /* We overflowed if there is carry.  */
2457   if (carry)
2458     return 1;
2459 
2460   /* We would overflow if any significant unwritten parts would be
2461      non-zero.  This is true if any remaining src parts are non-zero
2462      and the multiplier is non-zero.  */
2463   if (multiplier)
2464     for (unsigned i = dstParts; i < srcParts; i++)
2465       if (src[i])
2466         return 1;
2467 
2468   /* We fitted in the narrow destination.  */
2469   return 0;
2470 }
2471 
2472 /* DST = LHS * RHS, where DST has the same width as the operands and
2473    is filled with the least significant parts of the result.  Returns
2474    one if overflow occurred, otherwise zero.  DST must be disjoint
2475    from both operands.  */
2476 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2477                       const WordType *rhs, unsigned parts) {
2478   assert(dst != lhs && dst != rhs);
2479 
2480   int overflow = 0;
2481   tcSet(dst, 0, parts);
2482 
2483   for (unsigned i = 0; i < parts; i++)
2484     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2485                                parts - i, true);
2486 
2487   return overflow;
2488 }
2489 
2490 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2491 /// operands. No overflow occurs. DST must be disjoint from both operands.
2492 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2493                            const WordType *rhs, unsigned lhsParts,
2494                            unsigned rhsParts) {
2495   /* Put the narrower number on the LHS for less loops below.  */
2496   if (lhsParts > rhsParts)
2497     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2498 
2499   assert(dst != lhs && dst != rhs);
2500 
2501   tcSet(dst, 0, rhsParts);
2502 
2503   for (unsigned i = 0; i < lhsParts; i++)
2504     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2505 }
2506 
2507 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2508    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2509    set REMAINDER to the remainder, return zero.  i.e.
2510 
2511    OLD_LHS = RHS * LHS + REMAINDER
2512 
2513    SCRATCH is a bignum of the same size as the operands and result for
2514    use by the routine; its contents need not be initialized and are
2515    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2516 */
2517 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2518                     WordType *remainder, WordType *srhs,
2519                     unsigned parts) {
2520   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2521 
2522   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2523   if (shiftCount == 0)
2524     return true;
2525 
2526   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2527   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2528   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2529 
2530   tcAssign(srhs, rhs, parts);
2531   tcShiftLeft(srhs, parts, shiftCount);
2532   tcAssign(remainder, lhs, parts);
2533   tcSet(lhs, 0, parts);
2534 
2535   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2536      the total.  */
2537   for (;;) {
2538     int compare = tcCompare(remainder, srhs, parts);
2539     if (compare >= 0) {
2540       tcSubtract(remainder, srhs, 0, parts);
2541       lhs[n] |= mask;
2542     }
2543 
2544     if (shiftCount == 0)
2545       break;
2546     shiftCount--;
2547     tcShiftRight(srhs, parts, 1);
2548     if ((mask >>= 1) == 0) {
2549       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2550       n--;
2551     }
2552   }
2553 
2554   return false;
2555 }
2556 
2557 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2558 /// no restrictions on Count.
2559 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2560   // Don't bother performing a no-op shift.
2561   if (!Count)
2562     return;
2563 
2564   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2565   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2566   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2567 
2568   // Fastpath for moving by whole words.
2569   if (BitShift == 0) {
2570     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2571   } else {
2572     while (Words-- > WordShift) {
2573       Dst[Words] = Dst[Words - WordShift] << BitShift;
2574       if (Words > WordShift)
2575         Dst[Words] |=
2576           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2577     }
2578   }
2579 
2580   // Fill in the remainder with 0s.
2581   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2582 }
2583 
2584 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2585 /// are no restrictions on Count.
2586 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2587   // Don't bother performing a no-op shift.
2588   if (!Count)
2589     return;
2590 
2591   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2592   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2593   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2594 
2595   unsigned WordsToMove = Words - WordShift;
2596   // Fastpath for moving by whole words.
2597   if (BitShift == 0) {
2598     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2599   } else {
2600     for (unsigned i = 0; i != WordsToMove; ++i) {
2601       Dst[i] = Dst[i + WordShift] >> BitShift;
2602       if (i + 1 != WordsToMove)
2603         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2604     }
2605   }
2606 
2607   // Fill in the remainder with 0s.
2608   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2609 }
2610 
2611 /* Bitwise and of two bignums.  */
2612 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2613   for (unsigned i = 0; i < parts; i++)
2614     dst[i] &= rhs[i];
2615 }
2616 
2617 /* Bitwise inclusive or of two bignums.  */
2618 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2619   for (unsigned i = 0; i < parts; i++)
2620     dst[i] |= rhs[i];
2621 }
2622 
2623 /* Bitwise exclusive or of two bignums.  */
2624 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2625   for (unsigned i = 0; i < parts; i++)
2626     dst[i] ^= rhs[i];
2627 }
2628 
2629 /* Complement a bignum in-place.  */
2630 void APInt::tcComplement(WordType *dst, unsigned parts) {
2631   for (unsigned i = 0; i < parts; i++)
2632     dst[i] = ~dst[i];
2633 }
2634 
2635 /* Comparison (unsigned) of two bignums.  */
2636 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2637                      unsigned parts) {
2638   while (parts) {
2639     parts--;
2640     if (lhs[parts] != rhs[parts])
2641       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2642   }
2643 
2644   return 0;
2645 }
2646 
2647 /* Set the least significant BITS bits of a bignum, clear the
2648    rest.  */
2649 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2650                                       unsigned bits) {
2651   unsigned i = 0;
2652   while (bits > APINT_BITS_PER_WORD) {
2653     dst[i++] = ~(WordType) 0;
2654     bits -= APINT_BITS_PER_WORD;
2655   }
2656 
2657   if (bits)
2658     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2659 
2660   while (i < parts)
2661     dst[i++] = 0;
2662 }
2663 
2664 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2665                                    APInt::Rounding RM) {
2666   // Currently udivrem always rounds down.
2667   switch (RM) {
2668   case APInt::Rounding::DOWN:
2669   case APInt::Rounding::TOWARD_ZERO:
2670     return A.udiv(B);
2671   case APInt::Rounding::UP: {
2672     APInt Quo, Rem;
2673     APInt::udivrem(A, B, Quo, Rem);
2674     if (Rem == 0)
2675       return Quo;
2676     return Quo + 1;
2677   }
2678   }
2679   llvm_unreachable("Unknown APInt::Rounding enum");
2680 }
2681 
2682 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2683                                    APInt::Rounding RM) {
2684   switch (RM) {
2685   case APInt::Rounding::DOWN:
2686   case APInt::Rounding::UP: {
2687     APInt Quo, Rem;
2688     APInt::sdivrem(A, B, Quo, Rem);
2689     if (Rem == 0)
2690       return Quo;
2691     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2692     // We want to check whether the non-integer part of the mathematical value
2693     // is negative or not. If the non-integer part is negative, we need to round
2694     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2695     // already rounded down.
2696     if (RM == APInt::Rounding::DOWN) {
2697       if (Rem.isNegative() != B.isNegative())
2698         return Quo - 1;
2699       return Quo;
2700     }
2701     if (Rem.isNegative() != B.isNegative())
2702       return Quo;
2703     return Quo + 1;
2704   }
2705   // Currently sdiv rounds twards zero.
2706   case APInt::Rounding::TOWARD_ZERO:
2707     return A.sdiv(B);
2708   }
2709   llvm_unreachable("Unknown APInt::Rounding enum");
2710 }
2711 
2712 Optional<APInt>
2713 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2714                                            unsigned RangeWidth) {
2715   unsigned CoeffWidth = A.getBitWidth();
2716   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2717   assert(RangeWidth <= CoeffWidth &&
2718          "Value range width should be less than coefficient width");
2719   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2720 
2721   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2722                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2723 
2724   // Identify 0 as a (non)solution immediately.
2725   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2726     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2727     return APInt(CoeffWidth, 0);
2728   }
2729 
2730   // The result of APInt arithmetic has the same bit width as the operands,
2731   // so it can actually lose high bits. A product of two n-bit integers needs
2732   // 2n-1 bits to represent the full value.
2733   // The operation done below (on quadratic coefficients) that can produce
2734   // the largest value is the evaluation of the equation during bisection,
2735   // which needs 3 times the bitwidth of the coefficient, so the total number
2736   // of required bits is 3n.
2737   //
2738   // The purpose of this extension is to simulate the set Z of all integers,
2739   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2740   // and negative numbers (not so much in a modulo arithmetic). The method
2741   // used to solve the equation is based on the standard formula for real
2742   // numbers, and uses the concepts of "positive" and "negative" with their
2743   // usual meanings.
2744   CoeffWidth *= 3;
2745   A = A.sext(CoeffWidth);
2746   B = B.sext(CoeffWidth);
2747   C = C.sext(CoeffWidth);
2748 
2749   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2750   // the bit width has increased.
2751   if (A.isNegative()) {
2752     A.negate();
2753     B.negate();
2754     C.negate();
2755   }
2756 
2757   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2758   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2759   // and R = 2^BitWidth.
2760   // Since we're trying not only to find exact solutions, but also values
2761   // that "wrap around", such a set will always have a solution, i.e. an x
2762   // that satisfies at least one of the equations, or such that |q(x)|
2763   // exceeds kR, while |q(x-1)| for the same k does not.
2764   //
2765   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2766   // positive solution n (in the above sense), and also such that the n
2767   // will be the least among all solutions corresponding to k = 0, 1, ...
2768   // (more precisely, the least element in the set
2769   //   { n(k) | k is such that a solution n(k) exists }).
2770   //
2771   // Consider the parabola (over real numbers) that corresponds to the
2772   // quadratic equation. Since A > 0, the arms of the parabola will point
2773   // up. Picking different values of k will shift it up and down by R.
2774   //
2775   // We want to shift the parabola in such a way as to reduce the problem
2776   // of solving q(x) = kR to solving shifted_q(x) = 0.
2777   // (The interesting solutions are the ceilings of the real number
2778   // solutions.)
2779   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2780   APInt TwoA = 2 * A;
2781   APInt SqrB = B * B;
2782   bool PickLow;
2783 
2784   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2785     assert(A.isStrictlyPositive());
2786     APInt T = V.abs().urem(A);
2787     if (T.isNullValue())
2788       return V;
2789     return V.isNegative() ? V+T : V+(A-T);
2790   };
2791 
2792   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2793   // iff B is positive.
2794   if (B.isNonNegative()) {
2795     // If B >= 0, the vertex it at a negative location (or at 0), so in
2796     // order to have a non-negative solution we need to pick k that makes
2797     // C-kR negative. To satisfy all the requirements for the solution
2798     // that we are looking for, it needs to be closest to 0 of all k.
2799     C = C.srem(R);
2800     if (C.isStrictlyPositive())
2801       C -= R;
2802     // Pick the greater solution.
2803     PickLow = false;
2804   } else {
2805     // If B < 0, the vertex is at a positive location. For any solution
2806     // to exist, the discriminant must be non-negative. This means that
2807     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2808     // lower bound on values of k: kR >= C - B^2/4A.
2809     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2810     // Round LowkR up (towards +inf) to the nearest kR.
2811     LowkR = RoundUp(LowkR, R);
2812 
2813     // If there exists k meeting the condition above, and such that
2814     // C-kR > 0, there will be two positive real number solutions of
2815     // q(x) = kR. Out of all such values of k, pick the one that makes
2816     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2817     // In other words, find maximum k such that LowkR <= kR < C.
2818     if (C.sgt(LowkR)) {
2819       // If LowkR < C, then such a k is guaranteed to exist because
2820       // LowkR itself is a multiple of R.
2821       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2822       // Pick the smaller solution.
2823       PickLow = true;
2824     } else {
2825       // If C-kR < 0 for all potential k's, it means that one solution
2826       // will be negative, while the other will be positive. The positive
2827       // solution will shift towards 0 if the parabola is moved up.
2828       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2829       // to 0, or in other words, out of all parabolas that have solutions,
2830       // pick the one that is the farthest "up").
2831       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2832       C -= LowkR;
2833       // Pick the greater solution.
2834       PickLow = false;
2835     }
2836   }
2837 
2838   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2839                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2840 
2841   APInt D = SqrB - 4*A*C;
2842   assert(D.isNonNegative() && "Negative discriminant");
2843   APInt SQ = D.sqrt();
2844 
2845   APInt Q = SQ * SQ;
2846   bool InexactSQ = Q != D;
2847   // The calculated SQ may actually be greater than the exact (non-integer)
2848   // value. If that's the case, decremement SQ to get a value that is lower.
2849   if (Q.sgt(D))
2850     SQ -= 1;
2851 
2852   APInt X;
2853   APInt Rem;
2854 
2855   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2856   // When using the quadratic formula directly, the calculated low root
2857   // may be greater than the exact one, since we would be subtracting SQ.
2858   // To make sure that the calculated root is not greater than the exact
2859   // one, subtract SQ+1 when calculating the low root (for inexact value
2860   // of SQ).
2861   if (PickLow)
2862     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2863   else
2864     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2865 
2866   // The updated coefficients should be such that the (exact) solution is
2867   // positive. Since APInt division rounds towards 0, the calculated one
2868   // can be 0, but cannot be negative.
2869   assert(X.isNonNegative() && "Solution should be non-negative");
2870 
2871   if (!InexactSQ && Rem.isNullValue()) {
2872     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2873     return X;
2874   }
2875 
2876   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2877   // The exact value of the square root of D should be between SQ and SQ+1.
2878   // This implies that the solution should be between that corresponding to
2879   // SQ (i.e. X) and that corresponding to SQ+1.
2880   //
2881   // The calculated X cannot be greater than the exact (real) solution.
2882   // Actually it must be strictly less than the exact solution, while
2883   // X+1 will be greater than or equal to it.
2884 
2885   APInt VX = (A*X + B)*X + C;
2886   APInt VY = VX + TwoA*X + A + B;
2887   bool SignChange = VX.isNegative() != VY.isNegative() ||
2888                     VX.isNullValue() != VY.isNullValue();
2889   // If the sign did not change between X and X+1, X is not a valid solution.
2890   // This could happen when the actual (exact) roots don't have an integer
2891   // between them, so they would both be contained between X and X+1.
2892   if (!SignChange) {
2893     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2894     return None;
2895   }
2896 
2897   X += 1;
2898   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2899   return X;
2900 }
2901