1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision integer
11 // constant values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <climits>
27 #include <cmath>
28 #include <cstdlib>
29 #include <cstring>
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "apint"
33 
34 /// A utility function for allocating memory, checking for allocation failures,
35 /// and ensuring the contents are zeroed.
36 inline static uint64_t* getClearedMemory(unsigned numWords) {
37   uint64_t *result = new uint64_t[numWords];
38   memset(result, 0, numWords * sizeof(uint64_t));
39   return result;
40 }
41 
42 /// A utility function for allocating memory and checking for allocation
43 /// failure.  The content is not zeroed.
44 inline static uint64_t* getMemory(unsigned numWords) {
45   return new uint64_t[numWords];
46 }
47 
48 /// A utility function that converts a character to a digit.
49 inline static unsigned getDigit(char cdigit, uint8_t radix) {
50   unsigned r;
51 
52   if (radix == 16 || radix == 36) {
53     r = cdigit - '0';
54     if (r <= 9)
55       return r;
56 
57     r = cdigit - 'A';
58     if (r <= radix - 11U)
59       return r + 10;
60 
61     r = cdigit - 'a';
62     if (r <= radix - 11U)
63       return r + 10;
64 
65     radix = 10;
66   }
67 
68   r = cdigit - '0';
69   if (r < radix)
70     return r;
71 
72   return -1U;
73 }
74 
75 
76 void APInt::initSlowCase(uint64_t val, bool isSigned) {
77   U.pVal = getClearedMemory(getNumWords());
78   U.pVal[0] = val;
79   if (isSigned && int64_t(val) < 0)
80     for (unsigned i = 1; i < getNumWords(); ++i)
81       U.pVal[i] = WORD_MAX;
82   clearUnusedBits();
83 }
84 
85 void APInt::initSlowCase(const APInt& that) {
86   U.pVal = getMemory(getNumWords());
87   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
88 }
89 
90 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
91   assert(BitWidth && "Bitwidth too small");
92   assert(bigVal.data() && "Null pointer detected!");
93   if (isSingleWord())
94     U.VAL = bigVal[0];
95   else {
96     // Get memory, cleared to 0
97     U.pVal = getClearedMemory(getNumWords());
98     // Calculate the number of words to copy
99     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
100     // Copy the words from bigVal to pVal
101     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
102   }
103   // Make sure unused high bits are cleared
104   clearUnusedBits();
105 }
106 
107 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
108   : BitWidth(numBits) {
109   initFromArray(bigVal);
110 }
111 
112 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
113   : BitWidth(numBits) {
114   initFromArray(makeArrayRef(bigVal, numWords));
115 }
116 
117 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
118   : BitWidth(numbits) {
119   assert(BitWidth && "Bitwidth too small");
120   fromString(numbits, Str, radix);
121 }
122 
123 void APInt::reallocate(unsigned NewBitWidth) {
124   // If the number of words is the same we can just change the width and stop.
125   if (getNumWords() == getNumWords(NewBitWidth)) {
126     BitWidth = NewBitWidth;
127     return;
128   }
129 
130   // If we have an allocation, delete it.
131   if (!isSingleWord())
132     delete [] U.pVal;
133 
134   // Update BitWidth.
135   BitWidth = NewBitWidth;
136 
137   // If we are supposed to have an allocation, create it.
138   if (!isSingleWord())
139     U.pVal = getMemory(getNumWords());
140 }
141 
142 void APInt::AssignSlowCase(const APInt& RHS) {
143   // Don't do anything for X = X
144   if (this == &RHS)
145     return;
146 
147   // Adjust the bit width and handle allocations as necessary.
148   reallocate(RHS.getBitWidth());
149 
150   // Copy the data.
151   if (isSingleWord())
152     U.VAL = RHS.U.VAL;
153   else
154     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
155 }
156 
157 /// This method 'profiles' an APInt for use with FoldingSet.
158 void APInt::Profile(FoldingSetNodeID& ID) const {
159   ID.AddInteger(BitWidth);
160 
161   if (isSingleWord()) {
162     ID.AddInteger(U.VAL);
163     return;
164   }
165 
166   unsigned NumWords = getNumWords();
167   for (unsigned i = 0; i < NumWords; ++i)
168     ID.AddInteger(U.pVal[i]);
169 }
170 
171 /// Prefix increment operator. Increments the APInt by one.
172 APInt& APInt::operator++() {
173   if (isSingleWord())
174     ++U.VAL;
175   else
176     tcIncrement(U.pVal, getNumWords());
177   return clearUnusedBits();
178 }
179 
180 /// Prefix decrement operator. Decrements the APInt by one.
181 APInt& APInt::operator--() {
182   if (isSingleWord())
183     --U.VAL;
184   else
185     tcDecrement(U.pVal, getNumWords());
186   return clearUnusedBits();
187 }
188 
189 /// Adds the RHS APint to this APInt.
190 /// @returns this, after addition of RHS.
191 /// Addition assignment operator.
192 APInt& APInt::operator+=(const APInt& RHS) {
193   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
194   if (isSingleWord())
195     U.VAL += RHS.U.VAL;
196   else
197     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
198   return clearUnusedBits();
199 }
200 
201 APInt& APInt::operator+=(uint64_t RHS) {
202   if (isSingleWord())
203     U.VAL += RHS;
204   else
205     tcAddPart(U.pVal, RHS, getNumWords());
206   return clearUnusedBits();
207 }
208 
209 /// Subtracts the RHS APInt from this APInt
210 /// @returns this, after subtraction
211 /// Subtraction assignment operator.
212 APInt& APInt::operator-=(const APInt& RHS) {
213   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
214   if (isSingleWord())
215     U.VAL -= RHS.U.VAL;
216   else
217     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
218   return clearUnusedBits();
219 }
220 
221 APInt& APInt::operator-=(uint64_t RHS) {
222   if (isSingleWord())
223     U.VAL -= RHS;
224   else
225     tcSubtractPart(U.pVal, RHS, getNumWords());
226   return clearUnusedBits();
227 }
228 
229 APInt APInt::operator*(const APInt& RHS) const {
230   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
231   if (isSingleWord())
232     return APInt(BitWidth, U.VAL * RHS.U.VAL);
233 
234   APInt Result(getMemory(getNumWords()), getBitWidth());
235 
236   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
237 
238   Result.clearUnusedBits();
239   return Result;
240 }
241 
242 void APInt::AndAssignSlowCase(const APInt& RHS) {
243   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
244 }
245 
246 void APInt::OrAssignSlowCase(const APInt& RHS) {
247   tcOr(U.pVal, RHS.U.pVal, getNumWords());
248 }
249 
250 void APInt::XorAssignSlowCase(const APInt& RHS) {
251   tcXor(U.pVal, RHS.U.pVal, getNumWords());
252 }
253 
254 APInt& APInt::operator*=(const APInt& RHS) {
255   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
256   *this = *this * RHS;
257   return *this;
258 }
259 
260 APInt& APInt::operator*=(uint64_t RHS) {
261   if (isSingleWord()) {
262     U.VAL *= RHS;
263   } else {
264     unsigned NumWords = getNumWords();
265     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
266   }
267   return clearUnusedBits();
268 }
269 
270 bool APInt::EqualSlowCase(const APInt& RHS) const {
271   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
272 }
273 
274 int APInt::compare(const APInt& RHS) const {
275   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
276   if (isSingleWord())
277     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
278 
279   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
280 }
281 
282 int APInt::compareSigned(const APInt& RHS) const {
283   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
284   if (isSingleWord()) {
285     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
286     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
287     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
288   }
289 
290   bool lhsNeg = isNegative();
291   bool rhsNeg = RHS.isNegative();
292 
293   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
294   if (lhsNeg != rhsNeg)
295     return lhsNeg ? -1 : 1;
296 
297   // Otherwise we can just use an unsigned comparison, because even negative
298   // numbers compare correctly this way if both have the same signed-ness.
299   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
300 }
301 
302 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
303   unsigned loWord = whichWord(loBit);
304   unsigned hiWord = whichWord(hiBit);
305 
306   // Create an initial mask for the low word with zeros below loBit.
307   uint64_t loMask = WORD_MAX << whichBit(loBit);
308 
309   // If hiBit is not aligned, we need a high mask.
310   unsigned hiShiftAmt = whichBit(hiBit);
311   if (hiShiftAmt != 0) {
312     // Create a high mask with zeros above hiBit.
313     uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
314     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
315     // set the bits in hiWord.
316     if (hiWord == loWord)
317       loMask &= hiMask;
318     else
319       U.pVal[hiWord] |= hiMask;
320   }
321   // Apply the mask to the low word.
322   U.pVal[loWord] |= loMask;
323 
324   // Fill any words between loWord and hiWord with all ones.
325   for (unsigned word = loWord + 1; word < hiWord; ++word)
326     U.pVal[word] = WORD_MAX;
327 }
328 
329 /// Toggle every bit to its opposite value.
330 void APInt::flipAllBitsSlowCase() {
331   tcComplement(U.pVal, getNumWords());
332   clearUnusedBits();
333 }
334 
335 /// Toggle a given bit to its opposite value whose position is given
336 /// as "bitPosition".
337 /// Toggles a given bit to its opposite value.
338 void APInt::flipBit(unsigned bitPosition) {
339   assert(bitPosition < BitWidth && "Out of the bit-width range!");
340   if ((*this)[bitPosition]) clearBit(bitPosition);
341   else setBit(bitPosition);
342 }
343 
344 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
345   unsigned subBitWidth = subBits.getBitWidth();
346   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
347          "Illegal bit insertion");
348 
349   // Insertion is a direct copy.
350   if (subBitWidth == BitWidth) {
351     *this = subBits;
352     return;
353   }
354 
355   // Single word result can be done as a direct bitmask.
356   if (isSingleWord()) {
357     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
358     U.VAL &= ~(mask << bitPosition);
359     U.VAL |= (subBits.U.VAL << bitPosition);
360     return;
361   }
362 
363   unsigned loBit = whichBit(bitPosition);
364   unsigned loWord = whichWord(bitPosition);
365   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
366 
367   // Insertion within a single word can be done as a direct bitmask.
368   if (loWord == hi1Word) {
369     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
370     U.pVal[loWord] &= ~(mask << loBit);
371     U.pVal[loWord] |= (subBits.U.VAL << loBit);
372     return;
373   }
374 
375   // Insert on word boundaries.
376   if (loBit == 0) {
377     // Direct copy whole words.
378     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
379     memcpy(U.pVal + loWord, subBits.getRawData(),
380            numWholeSubWords * APINT_WORD_SIZE);
381 
382     // Mask+insert remaining bits.
383     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
384     if (remainingBits != 0) {
385       uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits);
386       U.pVal[hi1Word] &= ~mask;
387       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
388     }
389     return;
390   }
391 
392   // General case - set/clear individual bits in dst based on src.
393   // TODO - there is scope for optimization here, but at the moment this code
394   // path is barely used so prefer readability over performance.
395   for (unsigned i = 0; i != subBitWidth; ++i) {
396     if (subBits[i])
397       setBit(bitPosition + i);
398     else
399       clearBit(bitPosition + i);
400   }
401 }
402 
403 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
404   assert(numBits > 0 && "Can't extract zero bits");
405   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
406          "Illegal bit extraction");
407 
408   if (isSingleWord())
409     return APInt(numBits, U.VAL >> bitPosition);
410 
411   unsigned loBit = whichBit(bitPosition);
412   unsigned loWord = whichWord(bitPosition);
413   unsigned hiWord = whichWord(bitPosition + numBits - 1);
414 
415   // Single word result extracting bits from a single word source.
416   if (loWord == hiWord)
417     return APInt(numBits, U.pVal[loWord] >> loBit);
418 
419   // Extracting bits that start on a source word boundary can be done
420   // as a fast memory copy.
421   if (loBit == 0)
422     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
423 
424   // General case - shift + copy source words directly into place.
425   APInt Result(numBits, 0);
426   unsigned NumSrcWords = getNumWords();
427   unsigned NumDstWords = Result.getNumWords();
428 
429   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
430   for (unsigned word = 0; word < NumDstWords; ++word) {
431     uint64_t w0 = U.pVal[loWord + word];
432     uint64_t w1 =
433         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
434     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
435   }
436 
437   return Result.clearUnusedBits();
438 }
439 
440 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
441   assert(!str.empty() && "Invalid string length");
442   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
443           radix == 36) &&
444          "Radix should be 2, 8, 10, 16, or 36!");
445 
446   size_t slen = str.size();
447 
448   // Each computation below needs to know if it's negative.
449   StringRef::iterator p = str.begin();
450   unsigned isNegative = *p == '-';
451   if (*p == '-' || *p == '+') {
452     p++;
453     slen--;
454     assert(slen && "String is only a sign, needs a value.");
455   }
456 
457   // For radixes of power-of-two values, the bits required is accurately and
458   // easily computed
459   if (radix == 2)
460     return slen + isNegative;
461   if (radix == 8)
462     return slen * 3 + isNegative;
463   if (radix == 16)
464     return slen * 4 + isNegative;
465 
466   // FIXME: base 36
467 
468   // This is grossly inefficient but accurate. We could probably do something
469   // with a computation of roughly slen*64/20 and then adjust by the value of
470   // the first few digits. But, I'm not sure how accurate that could be.
471 
472   // Compute a sufficient number of bits that is always large enough but might
473   // be too large. This avoids the assertion in the constructor. This
474   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
475   // bits in that case.
476   unsigned sufficient
477     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
478                  : (slen == 1 ? 7 : slen * 16/3);
479 
480   // Convert to the actual binary value.
481   APInt tmp(sufficient, StringRef(p, slen), radix);
482 
483   // Compute how many bits are required. If the log is infinite, assume we need
484   // just bit.
485   unsigned log = tmp.logBase2();
486   if (log == (unsigned)-1) {
487     return isNegative + 1;
488   } else {
489     return isNegative + log + 1;
490   }
491 }
492 
493 hash_code llvm::hash_value(const APInt &Arg) {
494   if (Arg.isSingleWord())
495     return hash_combine(Arg.U.VAL);
496 
497   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
498 }
499 
500 bool APInt::isSplat(unsigned SplatSizeInBits) const {
501   assert(getBitWidth() % SplatSizeInBits == 0 &&
502          "SplatSizeInBits must divide width!");
503   // We can check that all parts of an integer are equal by making use of a
504   // little trick: rotate and check if it's still the same value.
505   return *this == rotl(SplatSizeInBits);
506 }
507 
508 /// This function returns the high "numBits" bits of this APInt.
509 APInt APInt::getHiBits(unsigned numBits) const {
510   return this->lshr(BitWidth - numBits);
511 }
512 
513 /// This function returns the low "numBits" bits of this APInt.
514 APInt APInt::getLoBits(unsigned numBits) const {
515   APInt Result(getLowBitsSet(BitWidth, numBits));
516   Result &= *this;
517   return Result;
518 }
519 
520 /// Return a value containing V broadcasted over NewLen bits.
521 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
522   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
523 
524   APInt Val = V.zextOrSelf(NewLen);
525   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
526     Val |= Val << I;
527 
528   return Val;
529 }
530 
531 unsigned APInt::countLeadingZerosSlowCase() const {
532   unsigned Count = 0;
533   for (int i = getNumWords()-1; i >= 0; --i) {
534     uint64_t V = U.pVal[i];
535     if (V == 0)
536       Count += APINT_BITS_PER_WORD;
537     else {
538       Count += llvm::countLeadingZeros(V);
539       break;
540     }
541   }
542   // Adjust for unused bits in the most significant word (they are zero).
543   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
544   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
545   return Count;
546 }
547 
548 unsigned APInt::countLeadingOnesSlowCase() const {
549   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
550   unsigned shift;
551   if (!highWordBits) {
552     highWordBits = APINT_BITS_PER_WORD;
553     shift = 0;
554   } else {
555     shift = APINT_BITS_PER_WORD - highWordBits;
556   }
557   int i = getNumWords() - 1;
558   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
559   if (Count == highWordBits) {
560     for (i--; i >= 0; --i) {
561       if (U.pVal[i] == WORD_MAX)
562         Count += APINT_BITS_PER_WORD;
563       else {
564         Count += llvm::countLeadingOnes(U.pVal[i]);
565         break;
566       }
567     }
568   }
569   return Count;
570 }
571 
572 unsigned APInt::countTrailingZerosSlowCase() const {
573   unsigned Count = 0;
574   unsigned i = 0;
575   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
576     Count += APINT_BITS_PER_WORD;
577   if (i < getNumWords())
578     Count += llvm::countTrailingZeros(U.pVal[i]);
579   return std::min(Count, BitWidth);
580 }
581 
582 unsigned APInt::countTrailingOnesSlowCase() const {
583   unsigned Count = 0;
584   unsigned i = 0;
585   for (; i < getNumWords() && U.pVal[i] == WORD_MAX; ++i)
586     Count += APINT_BITS_PER_WORD;
587   if (i < getNumWords())
588     Count += llvm::countTrailingOnes(U.pVal[i]);
589   assert(Count <= BitWidth);
590   return Count;
591 }
592 
593 unsigned APInt::countPopulationSlowCase() const {
594   unsigned Count = 0;
595   for (unsigned i = 0; i < getNumWords(); ++i)
596     Count += llvm::countPopulation(U.pVal[i]);
597   return Count;
598 }
599 
600 bool APInt::intersectsSlowCase(const APInt &RHS) const {
601   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
602     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
603       return true;
604 
605   return false;
606 }
607 
608 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
609   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
610     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
611       return false;
612 
613   return true;
614 }
615 
616 APInt APInt::byteSwap() const {
617   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
618   if (BitWidth == 16)
619     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
620   if (BitWidth == 32)
621     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
622   if (BitWidth == 48) {
623     unsigned Tmp1 = unsigned(U.VAL >> 16);
624     Tmp1 = ByteSwap_32(Tmp1);
625     uint16_t Tmp2 = uint16_t(U.VAL);
626     Tmp2 = ByteSwap_16(Tmp2);
627     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
628   }
629   if (BitWidth == 64)
630     return APInt(BitWidth, ByteSwap_64(U.VAL));
631 
632   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
633   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
634     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
635   if (Result.BitWidth != BitWidth) {
636     Result.lshrInPlace(Result.BitWidth - BitWidth);
637     Result.BitWidth = BitWidth;
638   }
639   return Result;
640 }
641 
642 APInt APInt::reverseBits() const {
643   switch (BitWidth) {
644   case 64:
645     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
646   case 32:
647     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
648   case 16:
649     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
650   case 8:
651     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
652   default:
653     break;
654   }
655 
656   APInt Val(*this);
657   APInt Reversed(BitWidth, 0);
658   unsigned S = BitWidth;
659 
660   for (; Val != 0; Val.lshrInPlace(1)) {
661     Reversed <<= 1;
662     Reversed |= Val[0];
663     --S;
664   }
665 
666   Reversed <<= S;
667   return Reversed;
668 }
669 
670 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
671   // Fast-path a common case.
672   if (A == B) return A;
673 
674   // Corner cases: if either operand is zero, the other is the gcd.
675   if (!A) return B;
676   if (!B) return A;
677 
678   // Count common powers of 2 and remove all other powers of 2.
679   unsigned Pow2;
680   {
681     unsigned Pow2_A = A.countTrailingZeros();
682     unsigned Pow2_B = B.countTrailingZeros();
683     if (Pow2_A > Pow2_B) {
684       A.lshrInPlace(Pow2_A - Pow2_B);
685       Pow2 = Pow2_B;
686     } else if (Pow2_B > Pow2_A) {
687       B.lshrInPlace(Pow2_B - Pow2_A);
688       Pow2 = Pow2_A;
689     } else {
690       Pow2 = Pow2_A;
691     }
692   }
693 
694   // Both operands are odd multiples of 2^Pow_2:
695   //
696   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
697   //
698   // This is a modified version of Stein's algorithm, taking advantage of
699   // efficient countTrailingZeros().
700   while (A != B) {
701     if (A.ugt(B)) {
702       A -= B;
703       A.lshrInPlace(A.countTrailingZeros() - Pow2);
704     } else {
705       B -= A;
706       B.lshrInPlace(B.countTrailingZeros() - Pow2);
707     }
708   }
709 
710   return A;
711 }
712 
713 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
714   union {
715     double D;
716     uint64_t I;
717   } T;
718   T.D = Double;
719 
720   // Get the sign bit from the highest order bit
721   bool isNeg = T.I >> 63;
722 
723   // Get the 11-bit exponent and adjust for the 1023 bit bias
724   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
725 
726   // If the exponent is negative, the value is < 0 so just return 0.
727   if (exp < 0)
728     return APInt(width, 0u);
729 
730   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
731   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
732 
733   // If the exponent doesn't shift all bits out of the mantissa
734   if (exp < 52)
735     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
736                     APInt(width, mantissa >> (52 - exp));
737 
738   // If the client didn't provide enough bits for us to shift the mantissa into
739   // then the result is undefined, just return 0
740   if (width <= exp - 52)
741     return APInt(width, 0);
742 
743   // Otherwise, we have to shift the mantissa bits up to the right location
744   APInt Tmp(width, mantissa);
745   Tmp <<= (unsigned)exp - 52;
746   return isNeg ? -Tmp : Tmp;
747 }
748 
749 /// This function converts this APInt to a double.
750 /// The layout for double is as following (IEEE Standard 754):
751 ///  --------------------------------------
752 /// |  Sign    Exponent    Fraction    Bias |
753 /// |-------------------------------------- |
754 /// |  1[63]   11[62-52]   52[51-00]   1023 |
755 ///  --------------------------------------
756 double APInt::roundToDouble(bool isSigned) const {
757 
758   // Handle the simple case where the value is contained in one uint64_t.
759   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
760   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
761     if (isSigned) {
762       int64_t sext = SignExtend64(getWord(0), BitWidth);
763       return double(sext);
764     } else
765       return double(getWord(0));
766   }
767 
768   // Determine if the value is negative.
769   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
770 
771   // Construct the absolute value if we're negative.
772   APInt Tmp(isNeg ? -(*this) : (*this));
773 
774   // Figure out how many bits we're using.
775   unsigned n = Tmp.getActiveBits();
776 
777   // The exponent (without bias normalization) is just the number of bits
778   // we are using. Note that the sign bit is gone since we constructed the
779   // absolute value.
780   uint64_t exp = n;
781 
782   // Return infinity for exponent overflow
783   if (exp > 1023) {
784     if (!isSigned || !isNeg)
785       return std::numeric_limits<double>::infinity();
786     else
787       return -std::numeric_limits<double>::infinity();
788   }
789   exp += 1023; // Increment for 1023 bias
790 
791   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
792   // extract the high 52 bits from the correct words in pVal.
793   uint64_t mantissa;
794   unsigned hiWord = whichWord(n-1);
795   if (hiWord == 0) {
796     mantissa = Tmp.U.pVal[0];
797     if (n > 52)
798       mantissa >>= n - 52; // shift down, we want the top 52 bits.
799   } else {
800     assert(hiWord > 0 && "huh?");
801     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
802     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
803     mantissa = hibits | lobits;
804   }
805 
806   // The leading bit of mantissa is implicit, so get rid of it.
807   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
808   union {
809     double D;
810     uint64_t I;
811   } T;
812   T.I = sign | (exp << 52) | mantissa;
813   return T.D;
814 }
815 
816 // Truncate to new width.
817 APInt APInt::trunc(unsigned width) const {
818   assert(width < BitWidth && "Invalid APInt Truncate request");
819   assert(width && "Can't truncate to 0 bits");
820 
821   if (width <= APINT_BITS_PER_WORD)
822     return APInt(width, getRawData()[0]);
823 
824   APInt Result(getMemory(getNumWords(width)), width);
825 
826   // Copy full words.
827   unsigned i;
828   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
829     Result.U.pVal[i] = U.pVal[i];
830 
831   // Truncate and copy any partial word.
832   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
833   if (bits != 0)
834     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
835 
836   return Result;
837 }
838 
839 // Sign extend to a new width.
840 APInt APInt::sext(unsigned Width) const {
841   assert(Width > BitWidth && "Invalid APInt SignExtend request");
842 
843   if (Width <= APINT_BITS_PER_WORD)
844     return APInt(Width, SignExtend64(U.VAL, BitWidth));
845 
846   APInt Result(getMemory(getNumWords(Width)), Width);
847 
848   // Copy words.
849   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
850 
851   // Sign extend the last word since there may be unused bits in the input.
852   Result.U.pVal[getNumWords() - 1] =
853       SignExtend64(Result.U.pVal[getNumWords() - 1],
854                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
855 
856   // Fill with sign bits.
857   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
858               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
859   Result.clearUnusedBits();
860   return Result;
861 }
862 
863 //  Zero extend to a new width.
864 APInt APInt::zext(unsigned width) const {
865   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
866 
867   if (width <= APINT_BITS_PER_WORD)
868     return APInt(width, U.VAL);
869 
870   APInt Result(getMemory(getNumWords(width)), width);
871 
872   // Copy words.
873   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
874 
875   // Zero remaining words.
876   std::memset(Result.U.pVal + getNumWords(), 0,
877               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
878 
879   return Result;
880 }
881 
882 APInt APInt::zextOrTrunc(unsigned width) const {
883   if (BitWidth < width)
884     return zext(width);
885   if (BitWidth > width)
886     return trunc(width);
887   return *this;
888 }
889 
890 APInt APInt::sextOrTrunc(unsigned width) const {
891   if (BitWidth < width)
892     return sext(width);
893   if (BitWidth > width)
894     return trunc(width);
895   return *this;
896 }
897 
898 APInt APInt::zextOrSelf(unsigned width) const {
899   if (BitWidth < width)
900     return zext(width);
901   return *this;
902 }
903 
904 APInt APInt::sextOrSelf(unsigned width) const {
905   if (BitWidth < width)
906     return sext(width);
907   return *this;
908 }
909 
910 /// Arithmetic right-shift this APInt by shiftAmt.
911 /// Arithmetic right-shift function.
912 void APInt::ashrInPlace(const APInt &shiftAmt) {
913   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
914 }
915 
916 /// Arithmetic right-shift this APInt by shiftAmt.
917 /// Arithmetic right-shift function.
918 void APInt::ashrSlowCase(unsigned ShiftAmt) {
919   // Don't bother performing a no-op shift.
920   if (!ShiftAmt)
921     return;
922 
923   // Save the original sign bit for later.
924   bool Negative = isNegative();
925 
926   // WordShift is the inter-part shift; BitShift is intra-part shift.
927   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
928   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
929 
930   unsigned WordsToMove = getNumWords() - WordShift;
931   if (WordsToMove != 0) {
932     // Sign extend the last word to fill in the unused bits.
933     U.pVal[getNumWords() - 1] = SignExtend64(
934         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
935 
936     // Fastpath for moving by whole words.
937     if (BitShift == 0) {
938       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
939     } else {
940       // Move the words containing significant bits.
941       for (unsigned i = 0; i != WordsToMove - 1; ++i)
942         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
943                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
944 
945       // Handle the last word which has no high bits to copy.
946       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
947       // Sign extend one more time.
948       U.pVal[WordsToMove - 1] =
949           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
950     }
951   }
952 
953   // Fill in the remainder based on the original sign.
954   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
955               WordShift * APINT_WORD_SIZE);
956   clearUnusedBits();
957 }
958 
959 /// Logical right-shift this APInt by shiftAmt.
960 /// Logical right-shift function.
961 void APInt::lshrInPlace(const APInt &shiftAmt) {
962   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
963 }
964 
965 /// Logical right-shift this APInt by shiftAmt.
966 /// Logical right-shift function.
967 void APInt::lshrSlowCase(unsigned ShiftAmt) {
968   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
969 }
970 
971 /// Left-shift this APInt by shiftAmt.
972 /// Left-shift function.
973 APInt &APInt::operator<<=(const APInt &shiftAmt) {
974   // It's undefined behavior in C to shift by BitWidth or greater.
975   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
976   return *this;
977 }
978 
979 void APInt::shlSlowCase(unsigned ShiftAmt) {
980   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
981   clearUnusedBits();
982 }
983 
984 // Calculate the rotate amount modulo the bit width.
985 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
986   unsigned rotBitWidth = rotateAmt.getBitWidth();
987   APInt rot = rotateAmt;
988   if (rotBitWidth < BitWidth) {
989     // Extend the rotate APInt, so that the urem doesn't divide by 0.
990     // e.g. APInt(1, 32) would give APInt(1, 0).
991     rot = rotateAmt.zext(BitWidth);
992   }
993   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
994   return rot.getLimitedValue(BitWidth);
995 }
996 
997 APInt APInt::rotl(const APInt &rotateAmt) const {
998   return rotl(rotateModulo(BitWidth, rotateAmt));
999 }
1000 
1001 APInt APInt::rotl(unsigned rotateAmt) const {
1002   rotateAmt %= BitWidth;
1003   if (rotateAmt == 0)
1004     return *this;
1005   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1006 }
1007 
1008 APInt APInt::rotr(const APInt &rotateAmt) const {
1009   return rotr(rotateModulo(BitWidth, rotateAmt));
1010 }
1011 
1012 APInt APInt::rotr(unsigned rotateAmt) const {
1013   rotateAmt %= BitWidth;
1014   if (rotateAmt == 0)
1015     return *this;
1016   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1017 }
1018 
1019 // Square Root - this method computes and returns the square root of "this".
1020 // Three mechanisms are used for computation. For small values (<= 5 bits),
1021 // a table lookup is done. This gets some performance for common cases. For
1022 // values using less than 52 bits, the value is converted to double and then
1023 // the libc sqrt function is called. The result is rounded and then converted
1024 // back to a uint64_t which is then used to construct the result. Finally,
1025 // the Babylonian method for computing square roots is used.
1026 APInt APInt::sqrt() const {
1027 
1028   // Determine the magnitude of the value.
1029   unsigned magnitude = getActiveBits();
1030 
1031   // Use a fast table for some small values. This also gets rid of some
1032   // rounding errors in libc sqrt for small values.
1033   if (magnitude <= 5) {
1034     static const uint8_t results[32] = {
1035       /*     0 */ 0,
1036       /*  1- 2 */ 1, 1,
1037       /*  3- 6 */ 2, 2, 2, 2,
1038       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1039       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1040       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1041       /*    31 */ 6
1042     };
1043     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1044   }
1045 
1046   // If the magnitude of the value fits in less than 52 bits (the precision of
1047   // an IEEE double precision floating point value), then we can use the
1048   // libc sqrt function which will probably use a hardware sqrt computation.
1049   // This should be faster than the algorithm below.
1050   if (magnitude < 52) {
1051     return APInt(BitWidth,
1052                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1053                                                                : U.pVal[0])))));
1054   }
1055 
1056   // Okay, all the short cuts are exhausted. We must compute it. The following
1057   // is a classical Babylonian method for computing the square root. This code
1058   // was adapted to APInt from a wikipedia article on such computations.
1059   // See http://www.wikipedia.org/ and go to the page named
1060   // Calculate_an_integer_square_root.
1061   unsigned nbits = BitWidth, i = 4;
1062   APInt testy(BitWidth, 16);
1063   APInt x_old(BitWidth, 1);
1064   APInt x_new(BitWidth, 0);
1065   APInt two(BitWidth, 2);
1066 
1067   // Select a good starting value using binary logarithms.
1068   for (;; i += 2, testy = testy.shl(2))
1069     if (i >= nbits || this->ule(testy)) {
1070       x_old = x_old.shl(i / 2);
1071       break;
1072     }
1073 
1074   // Use the Babylonian method to arrive at the integer square root:
1075   for (;;) {
1076     x_new = (this->udiv(x_old) + x_old).udiv(two);
1077     if (x_old.ule(x_new))
1078       break;
1079     x_old = x_new;
1080   }
1081 
1082   // Make sure we return the closest approximation
1083   // NOTE: The rounding calculation below is correct. It will produce an
1084   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1085   // determined to be a rounding issue with pari/gp as it begins to use a
1086   // floating point representation after 192 bits. There are no discrepancies
1087   // between this algorithm and pari/gp for bit widths < 192 bits.
1088   APInt square(x_old * x_old);
1089   APInt nextSquare((x_old + 1) * (x_old +1));
1090   if (this->ult(square))
1091     return x_old;
1092   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1093   APInt midpoint((nextSquare - square).udiv(two));
1094   APInt offset(*this - square);
1095   if (offset.ult(midpoint))
1096     return x_old;
1097   return x_old + 1;
1098 }
1099 
1100 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1101 /// iterative extended Euclidean algorithm is used to solve for this value,
1102 /// however we simplify it to speed up calculating only the inverse, and take
1103 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1104 /// (potentially large) APInts around.
1105 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1106   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1107 
1108   // Using the properties listed at the following web page (accessed 06/21/08):
1109   //   http://www.numbertheory.org/php/euclid.html
1110   // (especially the properties numbered 3, 4 and 9) it can be proved that
1111   // BitWidth bits suffice for all the computations in the algorithm implemented
1112   // below. More precisely, this number of bits suffice if the multiplicative
1113   // inverse exists, but may not suffice for the general extended Euclidean
1114   // algorithm.
1115 
1116   APInt r[2] = { modulo, *this };
1117   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1118   APInt q(BitWidth, 0);
1119 
1120   unsigned i;
1121   for (i = 0; r[i^1] != 0; i ^= 1) {
1122     // An overview of the math without the confusing bit-flipping:
1123     // q = r[i-2] / r[i-1]
1124     // r[i] = r[i-2] % r[i-1]
1125     // t[i] = t[i-2] - t[i-1] * q
1126     udivrem(r[i], r[i^1], q, r[i]);
1127     t[i] -= t[i^1] * q;
1128   }
1129 
1130   // If this APInt and the modulo are not coprime, there is no multiplicative
1131   // inverse, so return 0. We check this by looking at the next-to-last
1132   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1133   // algorithm.
1134   if (r[i] != 1)
1135     return APInt(BitWidth, 0);
1136 
1137   // The next-to-last t is the multiplicative inverse.  However, we are
1138   // interested in a positive inverse. Calculate a positive one from a negative
1139   // one if necessary. A simple addition of the modulo suffices because
1140   // abs(t[i]) is known to be less than *this/2 (see the link above).
1141   if (t[i].isNegative())
1142     t[i] += modulo;
1143 
1144   return std::move(t[i]);
1145 }
1146 
1147 /// Calculate the magic numbers required to implement a signed integer division
1148 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1149 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1150 /// Warren, Jr., chapter 10.
1151 APInt::ms APInt::magic() const {
1152   const APInt& d = *this;
1153   unsigned p;
1154   APInt ad, anc, delta, q1, r1, q2, r2, t;
1155   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1156   struct ms mag;
1157 
1158   ad = d.abs();
1159   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1160   anc = t - 1 - t.urem(ad);   // absolute value of nc
1161   p = d.getBitWidth() - 1;    // initialize p
1162   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1163   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1164   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1165   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1166   do {
1167     p = p + 1;
1168     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1169     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1170     if (r1.uge(anc)) {  // must be unsigned comparison
1171       q1 = q1 + 1;
1172       r1 = r1 - anc;
1173     }
1174     q2 = q2<<1;          // update q2 = 2p/abs(d)
1175     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1176     if (r2.uge(ad)) {   // must be unsigned comparison
1177       q2 = q2 + 1;
1178       r2 = r2 - ad;
1179     }
1180     delta = ad - r2;
1181   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1182 
1183   mag.m = q2 + 1;
1184   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1185   mag.s = p - d.getBitWidth();          // resulting shift
1186   return mag;
1187 }
1188 
1189 /// Calculate the magic numbers required to implement an unsigned integer
1190 /// division by a constant as a sequence of multiplies, adds and shifts.
1191 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1192 /// S. Warren, Jr., chapter 10.
1193 /// LeadingZeros can be used to simplify the calculation if the upper bits
1194 /// of the divided value are known zero.
1195 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1196   const APInt& d = *this;
1197   unsigned p;
1198   APInt nc, delta, q1, r1, q2, r2;
1199   struct mu magu;
1200   magu.a = 0;               // initialize "add" indicator
1201   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1202   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1203   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1204 
1205   nc = allOnes - (allOnes - d).urem(d);
1206   p = d.getBitWidth() - 1;  // initialize p
1207   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1208   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1209   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1210   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1211   do {
1212     p = p + 1;
1213     if (r1.uge(nc - r1)) {
1214       q1 = q1 + q1 + 1;  // update q1
1215       r1 = r1 + r1 - nc; // update r1
1216     }
1217     else {
1218       q1 = q1+q1; // update q1
1219       r1 = r1+r1; // update r1
1220     }
1221     if ((r2 + 1).uge(d - r2)) {
1222       if (q2.uge(signedMax)) magu.a = 1;
1223       q2 = q2+q2 + 1;     // update q2
1224       r2 = r2+r2 + 1 - d; // update r2
1225     }
1226     else {
1227       if (q2.uge(signedMin)) magu.a = 1;
1228       q2 = q2+q2;     // update q2
1229       r2 = r2+r2 + 1; // update r2
1230     }
1231     delta = d - 1 - r2;
1232   } while (p < d.getBitWidth()*2 &&
1233            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1234   magu.m = q2 + 1; // resulting magic number
1235   magu.s = p - d.getBitWidth();  // resulting shift
1236   return magu;
1237 }
1238 
1239 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1240 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1241 /// variables here have the same names as in the algorithm. Comments explain
1242 /// the algorithm and any deviation from it.
1243 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1244                      unsigned m, unsigned n) {
1245   assert(u && "Must provide dividend");
1246   assert(v && "Must provide divisor");
1247   assert(q && "Must provide quotient");
1248   assert(u != v && u != q && v != q && "Must use different memory");
1249   assert(n>1 && "n must be > 1");
1250 
1251   // b denotes the base of the number system. In our case b is 2^32.
1252   const uint64_t b = uint64_t(1) << 32;
1253 
1254 // The DEBUG macros here tend to be spam in the debug output if you're not
1255 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1256 #pragma push_macro("LLVM_DEBUG")
1257 #ifndef KNUTH_DEBUG
1258 #undef LLVM_DEBUG
1259 #define LLVM_DEBUG(X)                                                          \
1260   do {                                                                         \
1261   } while (false)
1262 #endif
1263 
1264   LLVM_DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1265   LLVM_DEBUG(dbgs() << "KnuthDiv: original:");
1266   LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1267   LLVM_DEBUG(dbgs() << " by");
1268   LLVM_DEBUG(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1269   LLVM_DEBUG(dbgs() << '\n');
1270   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1271   // u and v by d. Note that we have taken Knuth's advice here to use a power
1272   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1273   // 2 allows us to shift instead of multiply and it is easy to determine the
1274   // shift amount from the leading zeros.  We are basically normalizing the u
1275   // and v so that its high bits are shifted to the top of v's range without
1276   // overflow. Note that this can require an extra word in u so that u must
1277   // be of length m+n+1.
1278   unsigned shift = countLeadingZeros(v[n-1]);
1279   uint32_t v_carry = 0;
1280   uint32_t u_carry = 0;
1281   if (shift) {
1282     for (unsigned i = 0; i < m+n; ++i) {
1283       uint32_t u_tmp = u[i] >> (32 - shift);
1284       u[i] = (u[i] << shift) | u_carry;
1285       u_carry = u_tmp;
1286     }
1287     for (unsigned i = 0; i < n; ++i) {
1288       uint32_t v_tmp = v[i] >> (32 - shift);
1289       v[i] = (v[i] << shift) | v_carry;
1290       v_carry = v_tmp;
1291     }
1292   }
1293   u[m+n] = u_carry;
1294 
1295   LLVM_DEBUG(dbgs() << "KnuthDiv:   normal:");
1296   LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1297   LLVM_DEBUG(dbgs() << " by");
1298   LLVM_DEBUG(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1299   LLVM_DEBUG(dbgs() << '\n');
1300 
1301   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1302   int j = m;
1303   do {
1304     LLVM_DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1305     // D3. [Calculate q'.].
1306     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1307     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1308     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1309     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1310     // on v[n-2] determines at high speed most of the cases in which the trial
1311     // value qp is one too large, and it eliminates all cases where qp is two
1312     // too large.
1313     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1314     LLVM_DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1315     uint64_t qp = dividend / v[n-1];
1316     uint64_t rp = dividend % v[n-1];
1317     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1318       qp--;
1319       rp += v[n-1];
1320       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1321         qp--;
1322     }
1323     LLVM_DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1324 
1325     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1326     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1327     // consists of a simple multiplication by a one-place number, combined with
1328     // a subtraction.
1329     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1330     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1331     // true value plus b**(n+1), namely as the b's complement of
1332     // the true value, and a "borrow" to the left should be remembered.
1333     int64_t borrow = 0;
1334     for (unsigned i = 0; i < n; ++i) {
1335       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1336       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1337       u[j+i] = Lo_32(subres);
1338       borrow = Hi_32(p) - Hi_32(subres);
1339       LLVM_DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1340                         << ", borrow = " << borrow << '\n');
1341     }
1342     bool isNeg = u[j+n] < borrow;
1343     u[j+n] -= Lo_32(borrow);
1344 
1345     LLVM_DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1346     LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1347     LLVM_DEBUG(dbgs() << '\n');
1348 
1349     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1350     // negative, go to step D6; otherwise go on to step D7.
1351     q[j] = Lo_32(qp);
1352     if (isNeg) {
1353       // D6. [Add back]. The probability that this step is necessary is very
1354       // small, on the order of only 2/b. Make sure that test data accounts for
1355       // this possibility. Decrease q[j] by 1
1356       q[j]--;
1357       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1358       // A carry will occur to the left of u[j+n], and it should be ignored
1359       // since it cancels with the borrow that occurred in D4.
1360       bool carry = false;
1361       for (unsigned i = 0; i < n; i++) {
1362         uint32_t limit = std::min(u[j+i],v[i]);
1363         u[j+i] += v[i] + carry;
1364         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1365       }
1366       u[j+n] += carry;
1367     }
1368     LLVM_DEBUG(dbgs() << "KnuthDiv: after correction:");
1369     LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1370     LLVM_DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1371 
1372     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1373   } while (--j >= 0);
1374 
1375   LLVM_DEBUG(dbgs() << "KnuthDiv: quotient:");
1376   LLVM_DEBUG(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1377   LLVM_DEBUG(dbgs() << '\n');
1378 
1379   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1380   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1381   // compute the remainder (urem uses this).
1382   if (r) {
1383     // The value d is expressed by the "shift" value above since we avoided
1384     // multiplication by d by using a shift left. So, all we have to do is
1385     // shift right here.
1386     if (shift) {
1387       uint32_t carry = 0;
1388       LLVM_DEBUG(dbgs() << "KnuthDiv: remainder:");
1389       for (int i = n-1; i >= 0; i--) {
1390         r[i] = (u[i] >> shift) | carry;
1391         carry = u[i] << (32 - shift);
1392         LLVM_DEBUG(dbgs() << " " << r[i]);
1393       }
1394     } else {
1395       for (int i = n-1; i >= 0; i--) {
1396         r[i] = u[i];
1397         LLVM_DEBUG(dbgs() << " " << r[i]);
1398       }
1399     }
1400     LLVM_DEBUG(dbgs() << '\n');
1401   }
1402   LLVM_DEBUG(dbgs() << '\n');
1403 
1404 #pragma pop_macro("LLVM_DEBUG")
1405 }
1406 
1407 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1408                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1409   assert(lhsWords >= rhsWords && "Fractional result");
1410 
1411   // First, compose the values into an array of 32-bit words instead of
1412   // 64-bit words. This is a necessity of both the "short division" algorithm
1413   // and the Knuth "classical algorithm" which requires there to be native
1414   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1415   // can't use 64-bit operands here because we don't have native results of
1416   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1417   // work on large-endian machines.
1418   unsigned n = rhsWords * 2;
1419   unsigned m = (lhsWords * 2) - n;
1420 
1421   // Allocate space for the temporary values we need either on the stack, if
1422   // it will fit, or on the heap if it won't.
1423   uint32_t SPACE[128];
1424   uint32_t *U = nullptr;
1425   uint32_t *V = nullptr;
1426   uint32_t *Q = nullptr;
1427   uint32_t *R = nullptr;
1428   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1429     U = &SPACE[0];
1430     V = &SPACE[m+n+1];
1431     Q = &SPACE[(m+n+1) + n];
1432     if (Remainder)
1433       R = &SPACE[(m+n+1) + n + (m+n)];
1434   } else {
1435     U = new uint32_t[m + n + 1];
1436     V = new uint32_t[n];
1437     Q = new uint32_t[m+n];
1438     if (Remainder)
1439       R = new uint32_t[n];
1440   }
1441 
1442   // Initialize the dividend
1443   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1444   for (unsigned i = 0; i < lhsWords; ++i) {
1445     uint64_t tmp = LHS[i];
1446     U[i * 2] = Lo_32(tmp);
1447     U[i * 2 + 1] = Hi_32(tmp);
1448   }
1449   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1450 
1451   // Initialize the divisor
1452   memset(V, 0, (n)*sizeof(uint32_t));
1453   for (unsigned i = 0; i < rhsWords; ++i) {
1454     uint64_t tmp = RHS[i];
1455     V[i * 2] = Lo_32(tmp);
1456     V[i * 2 + 1] = Hi_32(tmp);
1457   }
1458 
1459   // initialize the quotient and remainder
1460   memset(Q, 0, (m+n) * sizeof(uint32_t));
1461   if (Remainder)
1462     memset(R, 0, n * sizeof(uint32_t));
1463 
1464   // Now, adjust m and n for the Knuth division. n is the number of words in
1465   // the divisor. m is the number of words by which the dividend exceeds the
1466   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1467   // contain any zero words or the Knuth algorithm fails.
1468   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1469     n--;
1470     m++;
1471   }
1472   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1473     m--;
1474 
1475   // If we're left with only a single word for the divisor, Knuth doesn't work
1476   // so we implement the short division algorithm here. This is much simpler
1477   // and faster because we are certain that we can divide a 64-bit quantity
1478   // by a 32-bit quantity at hardware speed and short division is simply a
1479   // series of such operations. This is just like doing short division but we
1480   // are using base 2^32 instead of base 10.
1481   assert(n != 0 && "Divide by zero?");
1482   if (n == 1) {
1483     uint32_t divisor = V[0];
1484     uint32_t remainder = 0;
1485     for (int i = m; i >= 0; i--) {
1486       uint64_t partial_dividend = Make_64(remainder, U[i]);
1487       if (partial_dividend == 0) {
1488         Q[i] = 0;
1489         remainder = 0;
1490       } else if (partial_dividend < divisor) {
1491         Q[i] = 0;
1492         remainder = Lo_32(partial_dividend);
1493       } else if (partial_dividend == divisor) {
1494         Q[i] = 1;
1495         remainder = 0;
1496       } else {
1497         Q[i] = Lo_32(partial_dividend / divisor);
1498         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1499       }
1500     }
1501     if (R)
1502       R[0] = remainder;
1503   } else {
1504     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1505     // case n > 1.
1506     KnuthDiv(U, V, Q, R, m, n);
1507   }
1508 
1509   // If the caller wants the quotient
1510   if (Quotient) {
1511     for (unsigned i = 0; i < lhsWords; ++i)
1512       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1513   }
1514 
1515   // If the caller wants the remainder
1516   if (Remainder) {
1517     for (unsigned i = 0; i < rhsWords; ++i)
1518       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1519   }
1520 
1521   // Clean up the memory we allocated.
1522   if (U != &SPACE[0]) {
1523     delete [] U;
1524     delete [] V;
1525     delete [] Q;
1526     delete [] R;
1527   }
1528 }
1529 
1530 APInt APInt::udiv(const APInt &RHS) const {
1531   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1532 
1533   // First, deal with the easy case
1534   if (isSingleWord()) {
1535     assert(RHS.U.VAL != 0 && "Divide by zero?");
1536     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1537   }
1538 
1539   // Get some facts about the LHS and RHS number of bits and words
1540   unsigned lhsWords = getNumWords(getActiveBits());
1541   unsigned rhsBits  = RHS.getActiveBits();
1542   unsigned rhsWords = getNumWords(rhsBits);
1543   assert(rhsWords && "Divided by zero???");
1544 
1545   // Deal with some degenerate cases
1546   if (!lhsWords)
1547     // 0 / X ===> 0
1548     return APInt(BitWidth, 0);
1549   if (rhsBits == 1)
1550     // X / 1 ===> X
1551     return *this;
1552   if (lhsWords < rhsWords || this->ult(RHS))
1553     // X / Y ===> 0, iff X < Y
1554     return APInt(BitWidth, 0);
1555   if (*this == RHS)
1556     // X / X ===> 1
1557     return APInt(BitWidth, 1);
1558   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1559     // All high words are zero, just use native divide
1560     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1561 
1562   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1563   APInt Quotient(BitWidth, 0); // to hold result.
1564   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1565   return Quotient;
1566 }
1567 
1568 APInt APInt::udiv(uint64_t RHS) const {
1569   assert(RHS != 0 && "Divide by zero?");
1570 
1571   // First, deal with the easy case
1572   if (isSingleWord())
1573     return APInt(BitWidth, U.VAL / RHS);
1574 
1575   // Get some facts about the LHS words.
1576   unsigned lhsWords = getNumWords(getActiveBits());
1577 
1578   // Deal with some degenerate cases
1579   if (!lhsWords)
1580     // 0 / X ===> 0
1581     return APInt(BitWidth, 0);
1582   if (RHS == 1)
1583     // X / 1 ===> X
1584     return *this;
1585   if (this->ult(RHS))
1586     // X / Y ===> 0, iff X < Y
1587     return APInt(BitWidth, 0);
1588   if (*this == RHS)
1589     // X / X ===> 1
1590     return APInt(BitWidth, 1);
1591   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1592     // All high words are zero, just use native divide
1593     return APInt(BitWidth, this->U.pVal[0] / RHS);
1594 
1595   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1596   APInt Quotient(BitWidth, 0); // to hold result.
1597   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1598   return Quotient;
1599 }
1600 
1601 APInt APInt::sdiv(const APInt &RHS) const {
1602   if (isNegative()) {
1603     if (RHS.isNegative())
1604       return (-(*this)).udiv(-RHS);
1605     return -((-(*this)).udiv(RHS));
1606   }
1607   if (RHS.isNegative())
1608     return -(this->udiv(-RHS));
1609   return this->udiv(RHS);
1610 }
1611 
1612 APInt APInt::sdiv(int64_t RHS) const {
1613   if (isNegative()) {
1614     if (RHS < 0)
1615       return (-(*this)).udiv(-RHS);
1616     return -((-(*this)).udiv(RHS));
1617   }
1618   if (RHS < 0)
1619     return -(this->udiv(-RHS));
1620   return this->udiv(RHS);
1621 }
1622 
1623 APInt APInt::urem(const APInt &RHS) const {
1624   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1625   if (isSingleWord()) {
1626     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1627     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1628   }
1629 
1630   // Get some facts about the LHS
1631   unsigned lhsWords = getNumWords(getActiveBits());
1632 
1633   // Get some facts about the RHS
1634   unsigned rhsBits = RHS.getActiveBits();
1635   unsigned rhsWords = getNumWords(rhsBits);
1636   assert(rhsWords && "Performing remainder operation by zero ???");
1637 
1638   // Check the degenerate cases
1639   if (lhsWords == 0)
1640     // 0 % Y ===> 0
1641     return APInt(BitWidth, 0);
1642   if (rhsBits == 1)
1643     // X % 1 ===> 0
1644     return APInt(BitWidth, 0);
1645   if (lhsWords < rhsWords || this->ult(RHS))
1646     // X % Y ===> X, iff X < Y
1647     return *this;
1648   if (*this == RHS)
1649     // X % X == 0;
1650     return APInt(BitWidth, 0);
1651   if (lhsWords == 1)
1652     // All high words are zero, just use native remainder
1653     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1654 
1655   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1656   APInt Remainder(BitWidth, 0);
1657   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1658   return Remainder;
1659 }
1660 
1661 uint64_t APInt::urem(uint64_t RHS) const {
1662   assert(RHS != 0 && "Remainder by zero?");
1663 
1664   if (isSingleWord())
1665     return U.VAL % RHS;
1666 
1667   // Get some facts about the LHS
1668   unsigned lhsWords = getNumWords(getActiveBits());
1669 
1670   // Check the degenerate cases
1671   if (lhsWords == 0)
1672     // 0 % Y ===> 0
1673     return 0;
1674   if (RHS == 1)
1675     // X % 1 ===> 0
1676     return 0;
1677   if (this->ult(RHS))
1678     // X % Y ===> X, iff X < Y
1679     return getZExtValue();
1680   if (*this == RHS)
1681     // X % X == 0;
1682     return 0;
1683   if (lhsWords == 1)
1684     // All high words are zero, just use native remainder
1685     return U.pVal[0] % RHS;
1686 
1687   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1688   uint64_t Remainder;
1689   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1690   return Remainder;
1691 }
1692 
1693 APInt APInt::srem(const APInt &RHS) const {
1694   if (isNegative()) {
1695     if (RHS.isNegative())
1696       return -((-(*this)).urem(-RHS));
1697     return -((-(*this)).urem(RHS));
1698   }
1699   if (RHS.isNegative())
1700     return this->urem(-RHS);
1701   return this->urem(RHS);
1702 }
1703 
1704 int64_t APInt::srem(int64_t RHS) const {
1705   if (isNegative()) {
1706     if (RHS < 0)
1707       return -((-(*this)).urem(-RHS));
1708     return -((-(*this)).urem(RHS));
1709   }
1710   if (RHS < 0)
1711     return this->urem(-RHS);
1712   return this->urem(RHS);
1713 }
1714 
1715 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1716                     APInt &Quotient, APInt &Remainder) {
1717   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1718   unsigned BitWidth = LHS.BitWidth;
1719 
1720   // First, deal with the easy case
1721   if (LHS.isSingleWord()) {
1722     assert(RHS.U.VAL != 0 && "Divide by zero?");
1723     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1724     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1725     Quotient = APInt(BitWidth, QuotVal);
1726     Remainder = APInt(BitWidth, RemVal);
1727     return;
1728   }
1729 
1730   // Get some size facts about the dividend and divisor
1731   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1732   unsigned rhsBits  = RHS.getActiveBits();
1733   unsigned rhsWords = getNumWords(rhsBits);
1734   assert(rhsWords && "Performing divrem operation by zero ???");
1735 
1736   // Check the degenerate cases
1737   if (lhsWords == 0) {
1738     Quotient = 0;                // 0 / Y ===> 0
1739     Remainder = 0;               // 0 % Y ===> 0
1740     return;
1741   }
1742 
1743   if (rhsBits == 1) {
1744     Quotient = LHS;             // X / 1 ===> X
1745     Remainder = 0;              // X % 1 ===> 0
1746   }
1747 
1748   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1749     Remainder = LHS;            // X % Y ===> X, iff X < Y
1750     Quotient = 0;               // X / Y ===> 0, iff X < Y
1751     return;
1752   }
1753 
1754   if (LHS == RHS) {
1755     Quotient  = 1;              // X / X ===> 1
1756     Remainder = 0;              // X % X ===> 0;
1757     return;
1758   }
1759 
1760   // Make sure there is enough space to hold the results.
1761   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1762   // change the size. This is necessary if Quotient or Remainder is aliased
1763   // with LHS or RHS.
1764   Quotient.reallocate(BitWidth);
1765   Remainder.reallocate(BitWidth);
1766 
1767   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1768     // There is only one word to consider so use the native versions.
1769     uint64_t lhsValue = LHS.U.pVal[0];
1770     uint64_t rhsValue = RHS.U.pVal[0];
1771     Quotient = lhsValue / rhsValue;
1772     Remainder = lhsValue % rhsValue;
1773     return;
1774   }
1775 
1776   // Okay, lets do it the long way
1777   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1778          Remainder.U.pVal);
1779   // Clear the rest of the Quotient and Remainder.
1780   std::memset(Quotient.U.pVal + lhsWords, 0,
1781               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1782   std::memset(Remainder.U.pVal + rhsWords, 0,
1783               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1784 }
1785 
1786 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1787                     uint64_t &Remainder) {
1788   assert(RHS != 0 && "Divide by zero?");
1789   unsigned BitWidth = LHS.BitWidth;
1790 
1791   // First, deal with the easy case
1792   if (LHS.isSingleWord()) {
1793     uint64_t QuotVal = LHS.U.VAL / RHS;
1794     Remainder = LHS.U.VAL % RHS;
1795     Quotient = APInt(BitWidth, QuotVal);
1796     return;
1797   }
1798 
1799   // Get some size facts about the dividend and divisor
1800   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1801 
1802   // Check the degenerate cases
1803   if (lhsWords == 0) {
1804     Quotient = 0;                // 0 / Y ===> 0
1805     Remainder = 0;               // 0 % Y ===> 0
1806     return;
1807   }
1808 
1809   if (RHS == 1) {
1810     Quotient = LHS;             // X / 1 ===> X
1811     Remainder = 0;              // X % 1 ===> 0
1812   }
1813 
1814   if (LHS.ult(RHS)) {
1815     Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y
1816     Quotient = 0;                   // X / Y ===> 0, iff X < Y
1817     return;
1818   }
1819 
1820   if (LHS == RHS) {
1821     Quotient  = 1;              // X / X ===> 1
1822     Remainder = 0;              // X % X ===> 0;
1823     return;
1824   }
1825 
1826   // Make sure there is enough space to hold the results.
1827   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1828   // change the size. This is necessary if Quotient is aliased with LHS.
1829   Quotient.reallocate(BitWidth);
1830 
1831   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1832     // There is only one word to consider so use the native versions.
1833     uint64_t lhsValue = LHS.U.pVal[0];
1834     Quotient = lhsValue / RHS;
1835     Remainder = lhsValue % RHS;
1836     return;
1837   }
1838 
1839   // Okay, lets do it the long way
1840   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1841   // Clear the rest of the Quotient.
1842   std::memset(Quotient.U.pVal + lhsWords, 0,
1843               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1844 }
1845 
1846 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1847                     APInt &Quotient, APInt &Remainder) {
1848   if (LHS.isNegative()) {
1849     if (RHS.isNegative())
1850       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1851     else {
1852       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1853       Quotient.negate();
1854     }
1855     Remainder.negate();
1856   } else if (RHS.isNegative()) {
1857     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1858     Quotient.negate();
1859   } else {
1860     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1861   }
1862 }
1863 
1864 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1865                     APInt &Quotient, int64_t &Remainder) {
1866   uint64_t R = Remainder;
1867   if (LHS.isNegative()) {
1868     if (RHS < 0)
1869       APInt::udivrem(-LHS, -RHS, Quotient, R);
1870     else {
1871       APInt::udivrem(-LHS, RHS, Quotient, R);
1872       Quotient.negate();
1873     }
1874     R = -R;
1875   } else if (RHS < 0) {
1876     APInt::udivrem(LHS, -RHS, Quotient, R);
1877     Quotient.negate();
1878   } else {
1879     APInt::udivrem(LHS, RHS, Quotient, R);
1880   }
1881   Remainder = R;
1882 }
1883 
1884 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1885   APInt Res = *this+RHS;
1886   Overflow = isNonNegative() == RHS.isNonNegative() &&
1887              Res.isNonNegative() != isNonNegative();
1888   return Res;
1889 }
1890 
1891 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1892   APInt Res = *this+RHS;
1893   Overflow = Res.ult(RHS);
1894   return Res;
1895 }
1896 
1897 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1898   APInt Res = *this - RHS;
1899   Overflow = isNonNegative() != RHS.isNonNegative() &&
1900              Res.isNonNegative() != isNonNegative();
1901   return Res;
1902 }
1903 
1904 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1905   APInt Res = *this-RHS;
1906   Overflow = Res.ugt(*this);
1907   return Res;
1908 }
1909 
1910 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1911   // MININT/-1  -->  overflow.
1912   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1913   return sdiv(RHS);
1914 }
1915 
1916 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1917   APInt Res = *this * RHS;
1918 
1919   if (*this != 0 && RHS != 0)
1920     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1921   else
1922     Overflow = false;
1923   return Res;
1924 }
1925 
1926 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1927   APInt Res = *this * RHS;
1928 
1929   if (*this != 0 && RHS != 0)
1930     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1931   else
1932     Overflow = false;
1933   return Res;
1934 }
1935 
1936 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1937   Overflow = ShAmt.uge(getBitWidth());
1938   if (Overflow)
1939     return APInt(BitWidth, 0);
1940 
1941   if (isNonNegative()) // Don't allow sign change.
1942     Overflow = ShAmt.uge(countLeadingZeros());
1943   else
1944     Overflow = ShAmt.uge(countLeadingOnes());
1945 
1946   return *this << ShAmt;
1947 }
1948 
1949 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1950   Overflow = ShAmt.uge(getBitWidth());
1951   if (Overflow)
1952     return APInt(BitWidth, 0);
1953 
1954   Overflow = ShAmt.ugt(countLeadingZeros());
1955 
1956   return *this << ShAmt;
1957 }
1958 
1959 
1960 
1961 
1962 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1963   // Check our assumptions here
1964   assert(!str.empty() && "Invalid string length");
1965   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1966           radix == 36) &&
1967          "Radix should be 2, 8, 10, 16, or 36!");
1968 
1969   StringRef::iterator p = str.begin();
1970   size_t slen = str.size();
1971   bool isNeg = *p == '-';
1972   if (*p == '-' || *p == '+') {
1973     p++;
1974     slen--;
1975     assert(slen && "String is only a sign, needs a value.");
1976   }
1977   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1978   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
1979   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
1980   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
1981          "Insufficient bit width");
1982 
1983   // Allocate memory if needed
1984   if (isSingleWord())
1985     U.VAL = 0;
1986   else
1987     U.pVal = getClearedMemory(getNumWords());
1988 
1989   // Figure out if we can shift instead of multiply
1990   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1991 
1992   // Enter digit traversal loop
1993   for (StringRef::iterator e = str.end(); p != e; ++p) {
1994     unsigned digit = getDigit(*p, radix);
1995     assert(digit < radix && "Invalid character in digit string");
1996 
1997     // Shift or multiply the value by the radix
1998     if (slen > 1) {
1999       if (shift)
2000         *this <<= shift;
2001       else
2002         *this *= radix;
2003     }
2004 
2005     // Add in the digit we just interpreted
2006     *this += digit;
2007   }
2008   // If its negative, put it in two's complement form
2009   if (isNeg)
2010     this->negate();
2011 }
2012 
2013 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2014                      bool Signed, bool formatAsCLiteral) const {
2015   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2016           Radix == 36) &&
2017          "Radix should be 2, 8, 10, 16, or 36!");
2018 
2019   const char *Prefix = "";
2020   if (formatAsCLiteral) {
2021     switch (Radix) {
2022       case 2:
2023         // Binary literals are a non-standard extension added in gcc 4.3:
2024         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2025         Prefix = "0b";
2026         break;
2027       case 8:
2028         Prefix = "0";
2029         break;
2030       case 10:
2031         break; // No prefix
2032       case 16:
2033         Prefix = "0x";
2034         break;
2035       default:
2036         llvm_unreachable("Invalid radix!");
2037     }
2038   }
2039 
2040   // First, check for a zero value and just short circuit the logic below.
2041   if (*this == 0) {
2042     while (*Prefix) {
2043       Str.push_back(*Prefix);
2044       ++Prefix;
2045     };
2046     Str.push_back('0');
2047     return;
2048   }
2049 
2050   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2051 
2052   if (isSingleWord()) {
2053     char Buffer[65];
2054     char *BufPtr = std::end(Buffer);
2055 
2056     uint64_t N;
2057     if (!Signed) {
2058       N = getZExtValue();
2059     } else {
2060       int64_t I = getSExtValue();
2061       if (I >= 0) {
2062         N = I;
2063       } else {
2064         Str.push_back('-');
2065         N = -(uint64_t)I;
2066       }
2067     }
2068 
2069     while (*Prefix) {
2070       Str.push_back(*Prefix);
2071       ++Prefix;
2072     };
2073 
2074     while (N) {
2075       *--BufPtr = Digits[N % Radix];
2076       N /= Radix;
2077     }
2078     Str.append(BufPtr, std::end(Buffer));
2079     return;
2080   }
2081 
2082   APInt Tmp(*this);
2083 
2084   if (Signed && isNegative()) {
2085     // They want to print the signed version and it is a negative value
2086     // Flip the bits and add one to turn it into the equivalent positive
2087     // value and put a '-' in the result.
2088     Tmp.negate();
2089     Str.push_back('-');
2090   }
2091 
2092   while (*Prefix) {
2093     Str.push_back(*Prefix);
2094     ++Prefix;
2095   };
2096 
2097   // We insert the digits backward, then reverse them to get the right order.
2098   unsigned StartDig = Str.size();
2099 
2100   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2101   // because the number of bits per digit (1, 3 and 4 respectively) divides
2102   // equally.  We just shift until the value is zero.
2103   if (Radix == 2 || Radix == 8 || Radix == 16) {
2104     // Just shift tmp right for each digit width until it becomes zero
2105     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2106     unsigned MaskAmt = Radix - 1;
2107 
2108     while (Tmp.getBoolValue()) {
2109       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2110       Str.push_back(Digits[Digit]);
2111       Tmp.lshrInPlace(ShiftAmt);
2112     }
2113   } else {
2114     while (Tmp.getBoolValue()) {
2115       uint64_t Digit;
2116       udivrem(Tmp, Radix, Tmp, Digit);
2117       assert(Digit < Radix && "divide failed");
2118       Str.push_back(Digits[Digit]);
2119     }
2120   }
2121 
2122   // Reverse the digits before returning.
2123   std::reverse(Str.begin()+StartDig, Str.end());
2124 }
2125 
2126 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2127 /// It is better to pass in a SmallVector/SmallString to the methods above.
2128 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2129   SmallString<40> S;
2130   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2131   return S.str();
2132 }
2133 
2134 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2135 LLVM_DUMP_METHOD void APInt::dump() const {
2136   SmallString<40> S, U;
2137   this->toStringUnsigned(U);
2138   this->toStringSigned(S);
2139   dbgs() << "APInt(" << BitWidth << "b, "
2140          << U << "u " << S << "s)\n";
2141 }
2142 #endif
2143 
2144 void APInt::print(raw_ostream &OS, bool isSigned) const {
2145   SmallString<40> S;
2146   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2147   OS << S;
2148 }
2149 
2150 // This implements a variety of operations on a representation of
2151 // arbitrary precision, two's-complement, bignum integer values.
2152 
2153 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2154 // and unrestricting assumption.
2155 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2156               "Part width must be divisible by 2!");
2157 
2158 /* Some handy functions local to this file.  */
2159 
2160 /* Returns the integer part with the least significant BITS set.
2161    BITS cannot be zero.  */
2162 static inline APInt::WordType lowBitMask(unsigned bits) {
2163   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2164 
2165   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2166 }
2167 
2168 /* Returns the value of the lower half of PART.  */
2169 static inline APInt::WordType lowHalf(APInt::WordType part) {
2170   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2171 }
2172 
2173 /* Returns the value of the upper half of PART.  */
2174 static inline APInt::WordType highHalf(APInt::WordType part) {
2175   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2176 }
2177 
2178 /* Returns the bit number of the most significant set bit of a part.
2179    If the input number has no bits set -1U is returned.  */
2180 static unsigned partMSB(APInt::WordType value) {
2181   return findLastSet(value, ZB_Max);
2182 }
2183 
2184 /* Returns the bit number of the least significant set bit of a
2185    part.  If the input number has no bits set -1U is returned.  */
2186 static unsigned partLSB(APInt::WordType value) {
2187   return findFirstSet(value, ZB_Max);
2188 }
2189 
2190 /* Sets the least significant part of a bignum to the input value, and
2191    zeroes out higher parts.  */
2192 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2193   assert(parts > 0);
2194 
2195   dst[0] = part;
2196   for (unsigned i = 1; i < parts; i++)
2197     dst[i] = 0;
2198 }
2199 
2200 /* Assign one bignum to another.  */
2201 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2202   for (unsigned i = 0; i < parts; i++)
2203     dst[i] = src[i];
2204 }
2205 
2206 /* Returns true if a bignum is zero, false otherwise.  */
2207 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2208   for (unsigned i = 0; i < parts; i++)
2209     if (src[i])
2210       return false;
2211 
2212   return true;
2213 }
2214 
2215 /* Extract the given bit of a bignum; returns 0 or 1.  */
2216 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2217   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2218 }
2219 
2220 /* Set the given bit of a bignum. */
2221 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2222   parts[whichWord(bit)] |= maskBit(bit);
2223 }
2224 
2225 /* Clears the given bit of a bignum. */
2226 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2227   parts[whichWord(bit)] &= ~maskBit(bit);
2228 }
2229 
2230 /* Returns the bit number of the least significant set bit of a
2231    number.  If the input number has no bits set -1U is returned.  */
2232 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2233   for (unsigned i = 0; i < n; i++) {
2234     if (parts[i] != 0) {
2235       unsigned lsb = partLSB(parts[i]);
2236 
2237       return lsb + i * APINT_BITS_PER_WORD;
2238     }
2239   }
2240 
2241   return -1U;
2242 }
2243 
2244 /* Returns the bit number of the most significant set bit of a number.
2245    If the input number has no bits set -1U is returned.  */
2246 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2247   do {
2248     --n;
2249 
2250     if (parts[n] != 0) {
2251       unsigned msb = partMSB(parts[n]);
2252 
2253       return msb + n * APINT_BITS_PER_WORD;
2254     }
2255   } while (n);
2256 
2257   return -1U;
2258 }
2259 
2260 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2261    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2262    the least significant bit of DST.  All high bits above srcBITS in
2263    DST are zero-filled.  */
2264 void
2265 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2266                  unsigned srcBits, unsigned srcLSB) {
2267   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2268   assert(dstParts <= dstCount);
2269 
2270   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2271   tcAssign (dst, src + firstSrcPart, dstParts);
2272 
2273   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2274   tcShiftRight (dst, dstParts, shift);
2275 
2276   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2277      in DST.  If this is less that srcBits, append the rest, else
2278      clear the high bits.  */
2279   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2280   if (n < srcBits) {
2281     WordType mask = lowBitMask (srcBits - n);
2282     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2283                           << n % APINT_BITS_PER_WORD);
2284   } else if (n > srcBits) {
2285     if (srcBits % APINT_BITS_PER_WORD)
2286       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2287   }
2288 
2289   /* Clear high parts.  */
2290   while (dstParts < dstCount)
2291     dst[dstParts++] = 0;
2292 }
2293 
2294 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2295 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2296                              WordType c, unsigned parts) {
2297   assert(c <= 1);
2298 
2299   for (unsigned i = 0; i < parts; i++) {
2300     WordType l = dst[i];
2301     if (c) {
2302       dst[i] += rhs[i] + 1;
2303       c = (dst[i] <= l);
2304     } else {
2305       dst[i] += rhs[i];
2306       c = (dst[i] < l);
2307     }
2308   }
2309 
2310   return c;
2311 }
2312 
2313 /// This function adds a single "word" integer, src, to the multiple
2314 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2315 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2316 /// @returns the carry of the addition.
2317 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2318                                  unsigned parts) {
2319   for (unsigned i = 0; i < parts; ++i) {
2320     dst[i] += src;
2321     if (dst[i] >= src)
2322       return 0; // No need to carry so exit early.
2323     src = 1; // Carry one to next digit.
2324   }
2325 
2326   return 1;
2327 }
2328 
2329 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2330 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2331                                   WordType c, unsigned parts) {
2332   assert(c <= 1);
2333 
2334   for (unsigned i = 0; i < parts; i++) {
2335     WordType l = dst[i];
2336     if (c) {
2337       dst[i] -= rhs[i] + 1;
2338       c = (dst[i] >= l);
2339     } else {
2340       dst[i] -= rhs[i];
2341       c = (dst[i] > l);
2342     }
2343   }
2344 
2345   return c;
2346 }
2347 
2348 /// This function subtracts a single "word" (64-bit word), src, from
2349 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2350 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2351 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2352 /// exhausted. In other words, if src > dst then this function returns 1,
2353 /// otherwise 0.
2354 /// @returns the borrow out of the subtraction
2355 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2356                                       unsigned parts) {
2357   for (unsigned i = 0; i < parts; ++i) {
2358     WordType Dst = dst[i];
2359     dst[i] -= src;
2360     if (src <= Dst)
2361       return 0; // No need to borrow so exit early.
2362     src = 1; // We have to "borrow 1" from next "word"
2363   }
2364 
2365   return 1;
2366 }
2367 
2368 /* Negate a bignum in-place.  */
2369 void APInt::tcNegate(WordType *dst, unsigned parts) {
2370   tcComplement(dst, parts);
2371   tcIncrement(dst, parts);
2372 }
2373 
2374 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2375     DST  = SRC * MULTIPLIER + CARRY   if add is false
2376 
2377     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2378     they must start at the same point, i.e. DST == SRC.
2379 
2380     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2381     returned.  Otherwise DST is filled with the least significant
2382     DSTPARTS parts of the result, and if all of the omitted higher
2383     parts were zero return zero, otherwise overflow occurred and
2384     return one.  */
2385 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2386                           WordType multiplier, WordType carry,
2387                           unsigned srcParts, unsigned dstParts,
2388                           bool add) {
2389   /* Otherwise our writes of DST kill our later reads of SRC.  */
2390   assert(dst <= src || dst >= src + srcParts);
2391   assert(dstParts <= srcParts + 1);
2392 
2393   /* N loops; minimum of dstParts and srcParts.  */
2394   unsigned n = std::min(dstParts, srcParts);
2395 
2396   for (unsigned i = 0; i < n; i++) {
2397     WordType low, mid, high, srcPart;
2398 
2399       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2400 
2401          This cannot overflow, because
2402 
2403          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2404 
2405          which is less than n^2.  */
2406 
2407     srcPart = src[i];
2408 
2409     if (multiplier == 0 || srcPart == 0) {
2410       low = carry;
2411       high = 0;
2412     } else {
2413       low = lowHalf(srcPart) * lowHalf(multiplier);
2414       high = highHalf(srcPart) * highHalf(multiplier);
2415 
2416       mid = lowHalf(srcPart) * highHalf(multiplier);
2417       high += highHalf(mid);
2418       mid <<= APINT_BITS_PER_WORD / 2;
2419       if (low + mid < low)
2420         high++;
2421       low += mid;
2422 
2423       mid = highHalf(srcPart) * lowHalf(multiplier);
2424       high += highHalf(mid);
2425       mid <<= APINT_BITS_PER_WORD / 2;
2426       if (low + mid < low)
2427         high++;
2428       low += mid;
2429 
2430       /* Now add carry.  */
2431       if (low + carry < low)
2432         high++;
2433       low += carry;
2434     }
2435 
2436     if (add) {
2437       /* And now DST[i], and store the new low part there.  */
2438       if (low + dst[i] < low)
2439         high++;
2440       dst[i] += low;
2441     } else
2442       dst[i] = low;
2443 
2444     carry = high;
2445   }
2446 
2447   if (srcParts < dstParts) {
2448     /* Full multiplication, there is no overflow.  */
2449     assert(srcParts + 1 == dstParts);
2450     dst[srcParts] = carry;
2451     return 0;
2452   }
2453 
2454   /* We overflowed if there is carry.  */
2455   if (carry)
2456     return 1;
2457 
2458   /* We would overflow if any significant unwritten parts would be
2459      non-zero.  This is true if any remaining src parts are non-zero
2460      and the multiplier is non-zero.  */
2461   if (multiplier)
2462     for (unsigned i = dstParts; i < srcParts; i++)
2463       if (src[i])
2464         return 1;
2465 
2466   /* We fitted in the narrow destination.  */
2467   return 0;
2468 }
2469 
2470 /* DST = LHS * RHS, where DST has the same width as the operands and
2471    is filled with the least significant parts of the result.  Returns
2472    one if overflow occurred, otherwise zero.  DST must be disjoint
2473    from both operands.  */
2474 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2475                       const WordType *rhs, unsigned parts) {
2476   assert(dst != lhs && dst != rhs);
2477 
2478   int overflow = 0;
2479   tcSet(dst, 0, parts);
2480 
2481   for (unsigned i = 0; i < parts; i++)
2482     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2483                                parts - i, true);
2484 
2485   return overflow;
2486 }
2487 
2488 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2489 /// operands. No overflow occurs. DST must be disjoint from both operands.
2490 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2491                            const WordType *rhs, unsigned lhsParts,
2492                            unsigned rhsParts) {
2493   /* Put the narrower number on the LHS for less loops below.  */
2494   if (lhsParts > rhsParts)
2495     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2496 
2497   assert(dst != lhs && dst != rhs);
2498 
2499   tcSet(dst, 0, rhsParts);
2500 
2501   for (unsigned i = 0; i < lhsParts; i++)
2502     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2503 }
2504 
2505 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2506    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2507    set REMAINDER to the remainder, return zero.  i.e.
2508 
2509    OLD_LHS = RHS * LHS + REMAINDER
2510 
2511    SCRATCH is a bignum of the same size as the operands and result for
2512    use by the routine; its contents need not be initialized and are
2513    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2514 */
2515 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2516                     WordType *remainder, WordType *srhs,
2517                     unsigned parts) {
2518   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2519 
2520   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2521   if (shiftCount == 0)
2522     return true;
2523 
2524   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2525   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2526   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2527 
2528   tcAssign(srhs, rhs, parts);
2529   tcShiftLeft(srhs, parts, shiftCount);
2530   tcAssign(remainder, lhs, parts);
2531   tcSet(lhs, 0, parts);
2532 
2533   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2534      the total.  */
2535   for (;;) {
2536     int compare = tcCompare(remainder, srhs, parts);
2537     if (compare >= 0) {
2538       tcSubtract(remainder, srhs, 0, parts);
2539       lhs[n] |= mask;
2540     }
2541 
2542     if (shiftCount == 0)
2543       break;
2544     shiftCount--;
2545     tcShiftRight(srhs, parts, 1);
2546     if ((mask >>= 1) == 0) {
2547       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2548       n--;
2549     }
2550   }
2551 
2552   return false;
2553 }
2554 
2555 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2556 /// no restrictions on Count.
2557 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2558   // Don't bother performing a no-op shift.
2559   if (!Count)
2560     return;
2561 
2562   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2563   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2564   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2565 
2566   // Fastpath for moving by whole words.
2567   if (BitShift == 0) {
2568     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2569   } else {
2570     while (Words-- > WordShift) {
2571       Dst[Words] = Dst[Words - WordShift] << BitShift;
2572       if (Words > WordShift)
2573         Dst[Words] |=
2574           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2575     }
2576   }
2577 
2578   // Fill in the remainder with 0s.
2579   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2580 }
2581 
2582 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2583 /// are no restrictions on Count.
2584 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2585   // Don't bother performing a no-op shift.
2586   if (!Count)
2587     return;
2588 
2589   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2590   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2591   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2592 
2593   unsigned WordsToMove = Words - WordShift;
2594   // Fastpath for moving by whole words.
2595   if (BitShift == 0) {
2596     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2597   } else {
2598     for (unsigned i = 0; i != WordsToMove; ++i) {
2599       Dst[i] = Dst[i + WordShift] >> BitShift;
2600       if (i + 1 != WordsToMove)
2601         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2602     }
2603   }
2604 
2605   // Fill in the remainder with 0s.
2606   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2607 }
2608 
2609 /* Bitwise and of two bignums.  */
2610 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2611   for (unsigned i = 0; i < parts; i++)
2612     dst[i] &= rhs[i];
2613 }
2614 
2615 /* Bitwise inclusive or of two bignums.  */
2616 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2617   for (unsigned i = 0; i < parts; i++)
2618     dst[i] |= rhs[i];
2619 }
2620 
2621 /* Bitwise exclusive or of two bignums.  */
2622 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2623   for (unsigned i = 0; i < parts; i++)
2624     dst[i] ^= rhs[i];
2625 }
2626 
2627 /* Complement a bignum in-place.  */
2628 void APInt::tcComplement(WordType *dst, unsigned parts) {
2629   for (unsigned i = 0; i < parts; i++)
2630     dst[i] = ~dst[i];
2631 }
2632 
2633 /* Comparison (unsigned) of two bignums.  */
2634 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2635                      unsigned parts) {
2636   while (parts) {
2637     parts--;
2638     if (lhs[parts] != rhs[parts])
2639       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2640   }
2641 
2642   return 0;
2643 }
2644 
2645 /* Set the least significant BITS bits of a bignum, clear the
2646    rest.  */
2647 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2648                                       unsigned bits) {
2649   unsigned i = 0;
2650   while (bits > APINT_BITS_PER_WORD) {
2651     dst[i++] = ~(WordType) 0;
2652     bits -= APINT_BITS_PER_WORD;
2653   }
2654 
2655   if (bits)
2656     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2657 
2658   while (i < parts)
2659     dst[i++] = 0;
2660 }
2661