1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision integer
11 // constant values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APint to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246 
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250 
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254 
255 APInt& APInt::operator*=(const APInt& RHS) {
256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORDTYPE_MAX;
328 }
329 
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332   tcComplement(U.pVal, getNumWords());
333   clearUnusedBits();
334 }
335 
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
341   if ((*this)[bitPosition]) clearBit(bitPosition);
342   else setBit(bitPosition);
343 }
344 
345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346   unsigned subBitWidth = subBits.getBitWidth();
347   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348          "Illegal bit insertion");
349 
350   // Insertion is a direct copy.
351   if (subBitWidth == BitWidth) {
352     *this = subBits;
353     return;
354   }
355 
356   // Single word result can be done as a direct bitmask.
357   if (isSingleWord()) {
358     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359     U.VAL &= ~(mask << bitPosition);
360     U.VAL |= (subBits.U.VAL << bitPosition);
361     return;
362   }
363 
364   unsigned loBit = whichBit(bitPosition);
365   unsigned loWord = whichWord(bitPosition);
366   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367 
368   // Insertion within a single word can be done as a direct bitmask.
369   if (loWord == hi1Word) {
370     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371     U.pVal[loWord] &= ~(mask << loBit);
372     U.pVal[loWord] |= (subBits.U.VAL << loBit);
373     return;
374   }
375 
376   // Insert on word boundaries.
377   if (loBit == 0) {
378     // Direct copy whole words.
379     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380     memcpy(U.pVal + loWord, subBits.getRawData(),
381            numWholeSubWords * APINT_WORD_SIZE);
382 
383     // Mask+insert remaining bits.
384     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385     if (remainingBits != 0) {
386       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387       U.pVal[hi1Word] &= ~mask;
388       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389     }
390     return;
391   }
392 
393   // General case - set/clear individual bits in dst based on src.
394   // TODO - there is scope for optimization here, but at the moment this code
395   // path is barely used so prefer readability over performance.
396   for (unsigned i = 0; i != subBitWidth; ++i) {
397     if (subBits[i])
398       setBit(bitPosition + i);
399     else
400       clearBit(bitPosition + i);
401   }
402 }
403 
404 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
405   assert(numBits > 0 && "Can't extract zero bits");
406   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
407          "Illegal bit extraction");
408 
409   if (isSingleWord())
410     return APInt(numBits, U.VAL >> bitPosition);
411 
412   unsigned loBit = whichBit(bitPosition);
413   unsigned loWord = whichWord(bitPosition);
414   unsigned hiWord = whichWord(bitPosition + numBits - 1);
415 
416   // Single word result extracting bits from a single word source.
417   if (loWord == hiWord)
418     return APInt(numBits, U.pVal[loWord] >> loBit);
419 
420   // Extracting bits that start on a source word boundary can be done
421   // as a fast memory copy.
422   if (loBit == 0)
423     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
424 
425   // General case - shift + copy source words directly into place.
426   APInt Result(numBits, 0);
427   unsigned NumSrcWords = getNumWords();
428   unsigned NumDstWords = Result.getNumWords();
429 
430   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
431   for (unsigned word = 0; word < NumDstWords; ++word) {
432     uint64_t w0 = U.pVal[loWord + word];
433     uint64_t w1 =
434         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
435     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
436   }
437 
438   return Result.clearUnusedBits();
439 }
440 
441 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
442   assert(!str.empty() && "Invalid string length");
443   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
444           radix == 36) &&
445          "Radix should be 2, 8, 10, 16, or 36!");
446 
447   size_t slen = str.size();
448 
449   // Each computation below needs to know if it's negative.
450   StringRef::iterator p = str.begin();
451   unsigned isNegative = *p == '-';
452   if (*p == '-' || *p == '+') {
453     p++;
454     slen--;
455     assert(slen && "String is only a sign, needs a value.");
456   }
457 
458   // For radixes of power-of-two values, the bits required is accurately and
459   // easily computed
460   if (radix == 2)
461     return slen + isNegative;
462   if (radix == 8)
463     return slen * 3 + isNegative;
464   if (radix == 16)
465     return slen * 4 + isNegative;
466 
467   // FIXME: base 36
468 
469   // This is grossly inefficient but accurate. We could probably do something
470   // with a computation of roughly slen*64/20 and then adjust by the value of
471   // the first few digits. But, I'm not sure how accurate that could be.
472 
473   // Compute a sufficient number of bits that is always large enough but might
474   // be too large. This avoids the assertion in the constructor. This
475   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
476   // bits in that case.
477   unsigned sufficient
478     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
479                  : (slen == 1 ? 7 : slen * 16/3);
480 
481   // Convert to the actual binary value.
482   APInt tmp(sufficient, StringRef(p, slen), radix);
483 
484   // Compute how many bits are required. If the log is infinite, assume we need
485   // just bit.
486   unsigned log = tmp.logBase2();
487   if (log == (unsigned)-1) {
488     return isNegative + 1;
489   } else {
490     return isNegative + log + 1;
491   }
492 }
493 
494 hash_code llvm::hash_value(const APInt &Arg) {
495   if (Arg.isSingleWord())
496     return hash_combine(Arg.U.VAL);
497 
498   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
499 }
500 
501 bool APInt::isSplat(unsigned SplatSizeInBits) const {
502   assert(getBitWidth() % SplatSizeInBits == 0 &&
503          "SplatSizeInBits must divide width!");
504   // We can check that all parts of an integer are equal by making use of a
505   // little trick: rotate and check if it's still the same value.
506   return *this == rotl(SplatSizeInBits);
507 }
508 
509 /// This function returns the high "numBits" bits of this APInt.
510 APInt APInt::getHiBits(unsigned numBits) const {
511   return this->lshr(BitWidth - numBits);
512 }
513 
514 /// This function returns the low "numBits" bits of this APInt.
515 APInt APInt::getLoBits(unsigned numBits) const {
516   APInt Result(getLowBitsSet(BitWidth, numBits));
517   Result &= *this;
518   return Result;
519 }
520 
521 /// Return a value containing V broadcasted over NewLen bits.
522 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
523   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
524 
525   APInt Val = V.zextOrSelf(NewLen);
526   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
527     Val |= Val << I;
528 
529   return Val;
530 }
531 
532 unsigned APInt::countLeadingZerosSlowCase() const {
533   unsigned Count = 0;
534   for (int i = getNumWords()-1; i >= 0; --i) {
535     uint64_t V = U.pVal[i];
536     if (V == 0)
537       Count += APINT_BITS_PER_WORD;
538     else {
539       Count += llvm::countLeadingZeros(V);
540       break;
541     }
542   }
543   // Adjust for unused bits in the most significant word (they are zero).
544   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
545   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
546   return Count;
547 }
548 
549 unsigned APInt::countLeadingOnesSlowCase() const {
550   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
551   unsigned shift;
552   if (!highWordBits) {
553     highWordBits = APINT_BITS_PER_WORD;
554     shift = 0;
555   } else {
556     shift = APINT_BITS_PER_WORD - highWordBits;
557   }
558   int i = getNumWords() - 1;
559   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
560   if (Count == highWordBits) {
561     for (i--; i >= 0; --i) {
562       if (U.pVal[i] == WORDTYPE_MAX)
563         Count += APINT_BITS_PER_WORD;
564       else {
565         Count += llvm::countLeadingOnes(U.pVal[i]);
566         break;
567       }
568     }
569   }
570   return Count;
571 }
572 
573 unsigned APInt::countTrailingZerosSlowCase() const {
574   unsigned Count = 0;
575   unsigned i = 0;
576   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
577     Count += APINT_BITS_PER_WORD;
578   if (i < getNumWords())
579     Count += llvm::countTrailingZeros(U.pVal[i]);
580   return std::min(Count, BitWidth);
581 }
582 
583 unsigned APInt::countTrailingOnesSlowCase() const {
584   unsigned Count = 0;
585   unsigned i = 0;
586   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
587     Count += APINT_BITS_PER_WORD;
588   if (i < getNumWords())
589     Count += llvm::countTrailingOnes(U.pVal[i]);
590   assert(Count <= BitWidth);
591   return Count;
592 }
593 
594 unsigned APInt::countPopulationSlowCase() const {
595   unsigned Count = 0;
596   for (unsigned i = 0; i < getNumWords(); ++i)
597     Count += llvm::countPopulation(U.pVal[i]);
598   return Count;
599 }
600 
601 bool APInt::intersectsSlowCase(const APInt &RHS) const {
602   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
603     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
604       return true;
605 
606   return false;
607 }
608 
609 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
610   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
611     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
612       return false;
613 
614   return true;
615 }
616 
617 APInt APInt::byteSwap() const {
618   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
619   if (BitWidth == 16)
620     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
621   if (BitWidth == 32)
622     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
623   if (BitWidth == 48) {
624     unsigned Tmp1 = unsigned(U.VAL >> 16);
625     Tmp1 = ByteSwap_32(Tmp1);
626     uint16_t Tmp2 = uint16_t(U.VAL);
627     Tmp2 = ByteSwap_16(Tmp2);
628     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
629   }
630   if (BitWidth == 64)
631     return APInt(BitWidth, ByteSwap_64(U.VAL));
632 
633   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
634   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
635     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
636   if (Result.BitWidth != BitWidth) {
637     Result.lshrInPlace(Result.BitWidth - BitWidth);
638     Result.BitWidth = BitWidth;
639   }
640   return Result;
641 }
642 
643 APInt APInt::reverseBits() const {
644   switch (BitWidth) {
645   case 64:
646     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
647   case 32:
648     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
649   case 16:
650     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
651   case 8:
652     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
653   default:
654     break;
655   }
656 
657   APInt Val(*this);
658   APInt Reversed(BitWidth, 0);
659   unsigned S = BitWidth;
660 
661   for (; Val != 0; Val.lshrInPlace(1)) {
662     Reversed <<= 1;
663     Reversed |= Val[0];
664     --S;
665   }
666 
667   Reversed <<= S;
668   return Reversed;
669 }
670 
671 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
672   // Fast-path a common case.
673   if (A == B) return A;
674 
675   // Corner cases: if either operand is zero, the other is the gcd.
676   if (!A) return B;
677   if (!B) return A;
678 
679   // Count common powers of 2 and remove all other powers of 2.
680   unsigned Pow2;
681   {
682     unsigned Pow2_A = A.countTrailingZeros();
683     unsigned Pow2_B = B.countTrailingZeros();
684     if (Pow2_A > Pow2_B) {
685       A.lshrInPlace(Pow2_A - Pow2_B);
686       Pow2 = Pow2_B;
687     } else if (Pow2_B > Pow2_A) {
688       B.lshrInPlace(Pow2_B - Pow2_A);
689       Pow2 = Pow2_A;
690     } else {
691       Pow2 = Pow2_A;
692     }
693   }
694 
695   // Both operands are odd multiples of 2^Pow_2:
696   //
697   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
698   //
699   // This is a modified version of Stein's algorithm, taking advantage of
700   // efficient countTrailingZeros().
701   while (A != B) {
702     if (A.ugt(B)) {
703       A -= B;
704       A.lshrInPlace(A.countTrailingZeros() - Pow2);
705     } else {
706       B -= A;
707       B.lshrInPlace(B.countTrailingZeros() - Pow2);
708     }
709   }
710 
711   return A;
712 }
713 
714 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
715   union {
716     double D;
717     uint64_t I;
718   } T;
719   T.D = Double;
720 
721   // Get the sign bit from the highest order bit
722   bool isNeg = T.I >> 63;
723 
724   // Get the 11-bit exponent and adjust for the 1023 bit bias
725   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
726 
727   // If the exponent is negative, the value is < 0 so just return 0.
728   if (exp < 0)
729     return APInt(width, 0u);
730 
731   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
732   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
733 
734   // If the exponent doesn't shift all bits out of the mantissa
735   if (exp < 52)
736     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
737                     APInt(width, mantissa >> (52 - exp));
738 
739   // If the client didn't provide enough bits for us to shift the mantissa into
740   // then the result is undefined, just return 0
741   if (width <= exp - 52)
742     return APInt(width, 0);
743 
744   // Otherwise, we have to shift the mantissa bits up to the right location
745   APInt Tmp(width, mantissa);
746   Tmp <<= (unsigned)exp - 52;
747   return isNeg ? -Tmp : Tmp;
748 }
749 
750 /// This function converts this APInt to a double.
751 /// The layout for double is as following (IEEE Standard 754):
752 ///  --------------------------------------
753 /// |  Sign    Exponent    Fraction    Bias |
754 /// |-------------------------------------- |
755 /// |  1[63]   11[62-52]   52[51-00]   1023 |
756 ///  --------------------------------------
757 double APInt::roundToDouble(bool isSigned) const {
758 
759   // Handle the simple case where the value is contained in one uint64_t.
760   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
761   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
762     if (isSigned) {
763       int64_t sext = SignExtend64(getWord(0), BitWidth);
764       return double(sext);
765     } else
766       return double(getWord(0));
767   }
768 
769   // Determine if the value is negative.
770   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
771 
772   // Construct the absolute value if we're negative.
773   APInt Tmp(isNeg ? -(*this) : (*this));
774 
775   // Figure out how many bits we're using.
776   unsigned n = Tmp.getActiveBits();
777 
778   // The exponent (without bias normalization) is just the number of bits
779   // we are using. Note that the sign bit is gone since we constructed the
780   // absolute value.
781   uint64_t exp = n;
782 
783   // Return infinity for exponent overflow
784   if (exp > 1023) {
785     if (!isSigned || !isNeg)
786       return std::numeric_limits<double>::infinity();
787     else
788       return -std::numeric_limits<double>::infinity();
789   }
790   exp += 1023; // Increment for 1023 bias
791 
792   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
793   // extract the high 52 bits from the correct words in pVal.
794   uint64_t mantissa;
795   unsigned hiWord = whichWord(n-1);
796   if (hiWord == 0) {
797     mantissa = Tmp.U.pVal[0];
798     if (n > 52)
799       mantissa >>= n - 52; // shift down, we want the top 52 bits.
800   } else {
801     assert(hiWord > 0 && "huh?");
802     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
803     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
804     mantissa = hibits | lobits;
805   }
806 
807   // The leading bit of mantissa is implicit, so get rid of it.
808   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
809   union {
810     double D;
811     uint64_t I;
812   } T;
813   T.I = sign | (exp << 52) | mantissa;
814   return T.D;
815 }
816 
817 // Truncate to new width.
818 APInt APInt::trunc(unsigned width) const {
819   assert(width < BitWidth && "Invalid APInt Truncate request");
820   assert(width && "Can't truncate to 0 bits");
821 
822   if (width <= APINT_BITS_PER_WORD)
823     return APInt(width, getRawData()[0]);
824 
825   APInt Result(getMemory(getNumWords(width)), width);
826 
827   // Copy full words.
828   unsigned i;
829   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
830     Result.U.pVal[i] = U.pVal[i];
831 
832   // Truncate and copy any partial word.
833   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
834   if (bits != 0)
835     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
836 
837   return Result;
838 }
839 
840 // Sign extend to a new width.
841 APInt APInt::sext(unsigned Width) const {
842   assert(Width > BitWidth && "Invalid APInt SignExtend request");
843 
844   if (Width <= APINT_BITS_PER_WORD)
845     return APInt(Width, SignExtend64(U.VAL, BitWidth));
846 
847   APInt Result(getMemory(getNumWords(Width)), Width);
848 
849   // Copy words.
850   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
851 
852   // Sign extend the last word since there may be unused bits in the input.
853   Result.U.pVal[getNumWords() - 1] =
854       SignExtend64(Result.U.pVal[getNumWords() - 1],
855                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
856 
857   // Fill with sign bits.
858   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
859               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
860   Result.clearUnusedBits();
861   return Result;
862 }
863 
864 //  Zero extend to a new width.
865 APInt APInt::zext(unsigned width) const {
866   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
867 
868   if (width <= APINT_BITS_PER_WORD)
869     return APInt(width, U.VAL);
870 
871   APInt Result(getMemory(getNumWords(width)), width);
872 
873   // Copy words.
874   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
875 
876   // Zero remaining words.
877   std::memset(Result.U.pVal + getNumWords(), 0,
878               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
879 
880   return Result;
881 }
882 
883 APInt APInt::zextOrTrunc(unsigned width) const {
884   if (BitWidth < width)
885     return zext(width);
886   if (BitWidth > width)
887     return trunc(width);
888   return *this;
889 }
890 
891 APInt APInt::sextOrTrunc(unsigned width) const {
892   if (BitWidth < width)
893     return sext(width);
894   if (BitWidth > width)
895     return trunc(width);
896   return *this;
897 }
898 
899 APInt APInt::zextOrSelf(unsigned width) const {
900   if (BitWidth < width)
901     return zext(width);
902   return *this;
903 }
904 
905 APInt APInt::sextOrSelf(unsigned width) const {
906   if (BitWidth < width)
907     return sext(width);
908   return *this;
909 }
910 
911 /// Arithmetic right-shift this APInt by shiftAmt.
912 /// Arithmetic right-shift function.
913 void APInt::ashrInPlace(const APInt &shiftAmt) {
914   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
915 }
916 
917 /// Arithmetic right-shift this APInt by shiftAmt.
918 /// Arithmetic right-shift function.
919 void APInt::ashrSlowCase(unsigned ShiftAmt) {
920   // Don't bother performing a no-op shift.
921   if (!ShiftAmt)
922     return;
923 
924   // Save the original sign bit for later.
925   bool Negative = isNegative();
926 
927   // WordShift is the inter-part shift; BitShift is intra-part shift.
928   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
929   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
930 
931   unsigned WordsToMove = getNumWords() - WordShift;
932   if (WordsToMove != 0) {
933     // Sign extend the last word to fill in the unused bits.
934     U.pVal[getNumWords() - 1] = SignExtend64(
935         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
936 
937     // Fastpath for moving by whole words.
938     if (BitShift == 0) {
939       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
940     } else {
941       // Move the words containing significant bits.
942       for (unsigned i = 0; i != WordsToMove - 1; ++i)
943         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
944                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
945 
946       // Handle the last word which has no high bits to copy.
947       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
948       // Sign extend one more time.
949       U.pVal[WordsToMove - 1] =
950           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
951     }
952   }
953 
954   // Fill in the remainder based on the original sign.
955   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
956               WordShift * APINT_WORD_SIZE);
957   clearUnusedBits();
958 }
959 
960 /// Logical right-shift this APInt by shiftAmt.
961 /// Logical right-shift function.
962 void APInt::lshrInPlace(const APInt &shiftAmt) {
963   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
964 }
965 
966 /// Logical right-shift this APInt by shiftAmt.
967 /// Logical right-shift function.
968 void APInt::lshrSlowCase(unsigned ShiftAmt) {
969   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
970 }
971 
972 /// Left-shift this APInt by shiftAmt.
973 /// Left-shift function.
974 APInt &APInt::operator<<=(const APInt &shiftAmt) {
975   // It's undefined behavior in C to shift by BitWidth or greater.
976   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
977   return *this;
978 }
979 
980 void APInt::shlSlowCase(unsigned ShiftAmt) {
981   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
982   clearUnusedBits();
983 }
984 
985 // Calculate the rotate amount modulo the bit width.
986 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
987   unsigned rotBitWidth = rotateAmt.getBitWidth();
988   APInt rot = rotateAmt;
989   if (rotBitWidth < BitWidth) {
990     // Extend the rotate APInt, so that the urem doesn't divide by 0.
991     // e.g. APInt(1, 32) would give APInt(1, 0).
992     rot = rotateAmt.zext(BitWidth);
993   }
994   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
995   return rot.getLimitedValue(BitWidth);
996 }
997 
998 APInt APInt::rotl(const APInt &rotateAmt) const {
999   return rotl(rotateModulo(BitWidth, rotateAmt));
1000 }
1001 
1002 APInt APInt::rotl(unsigned rotateAmt) const {
1003   rotateAmt %= BitWidth;
1004   if (rotateAmt == 0)
1005     return *this;
1006   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1007 }
1008 
1009 APInt APInt::rotr(const APInt &rotateAmt) const {
1010   return rotr(rotateModulo(BitWidth, rotateAmt));
1011 }
1012 
1013 APInt APInt::rotr(unsigned rotateAmt) const {
1014   rotateAmt %= BitWidth;
1015   if (rotateAmt == 0)
1016     return *this;
1017   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1018 }
1019 
1020 // Square Root - this method computes and returns the square root of "this".
1021 // Three mechanisms are used for computation. For small values (<= 5 bits),
1022 // a table lookup is done. This gets some performance for common cases. For
1023 // values using less than 52 bits, the value is converted to double and then
1024 // the libc sqrt function is called. The result is rounded and then converted
1025 // back to a uint64_t which is then used to construct the result. Finally,
1026 // the Babylonian method for computing square roots is used.
1027 APInt APInt::sqrt() const {
1028 
1029   // Determine the magnitude of the value.
1030   unsigned magnitude = getActiveBits();
1031 
1032   // Use a fast table for some small values. This also gets rid of some
1033   // rounding errors in libc sqrt for small values.
1034   if (magnitude <= 5) {
1035     static const uint8_t results[32] = {
1036       /*     0 */ 0,
1037       /*  1- 2 */ 1, 1,
1038       /*  3- 6 */ 2, 2, 2, 2,
1039       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1040       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1041       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1042       /*    31 */ 6
1043     };
1044     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1045   }
1046 
1047   // If the magnitude of the value fits in less than 52 bits (the precision of
1048   // an IEEE double precision floating point value), then we can use the
1049   // libc sqrt function which will probably use a hardware sqrt computation.
1050   // This should be faster than the algorithm below.
1051   if (magnitude < 52) {
1052     return APInt(BitWidth,
1053                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1054                                                                : U.pVal[0])))));
1055   }
1056 
1057   // Okay, all the short cuts are exhausted. We must compute it. The following
1058   // is a classical Babylonian method for computing the square root. This code
1059   // was adapted to APInt from a wikipedia article on such computations.
1060   // See http://www.wikipedia.org/ and go to the page named
1061   // Calculate_an_integer_square_root.
1062   unsigned nbits = BitWidth, i = 4;
1063   APInt testy(BitWidth, 16);
1064   APInt x_old(BitWidth, 1);
1065   APInt x_new(BitWidth, 0);
1066   APInt two(BitWidth, 2);
1067 
1068   // Select a good starting value using binary logarithms.
1069   for (;; i += 2, testy = testy.shl(2))
1070     if (i >= nbits || this->ule(testy)) {
1071       x_old = x_old.shl(i / 2);
1072       break;
1073     }
1074 
1075   // Use the Babylonian method to arrive at the integer square root:
1076   for (;;) {
1077     x_new = (this->udiv(x_old) + x_old).udiv(two);
1078     if (x_old.ule(x_new))
1079       break;
1080     x_old = x_new;
1081   }
1082 
1083   // Make sure we return the closest approximation
1084   // NOTE: The rounding calculation below is correct. It will produce an
1085   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1086   // determined to be a rounding issue with pari/gp as it begins to use a
1087   // floating point representation after 192 bits. There are no discrepancies
1088   // between this algorithm and pari/gp for bit widths < 192 bits.
1089   APInt square(x_old * x_old);
1090   APInt nextSquare((x_old + 1) * (x_old +1));
1091   if (this->ult(square))
1092     return x_old;
1093   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1094   APInt midpoint((nextSquare - square).udiv(two));
1095   APInt offset(*this - square);
1096   if (offset.ult(midpoint))
1097     return x_old;
1098   return x_old + 1;
1099 }
1100 
1101 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1102 /// iterative extended Euclidean algorithm is used to solve for this value,
1103 /// however we simplify it to speed up calculating only the inverse, and take
1104 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1105 /// (potentially large) APInts around.
1106 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1107   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1108 
1109   // Using the properties listed at the following web page (accessed 06/21/08):
1110   //   http://www.numbertheory.org/php/euclid.html
1111   // (especially the properties numbered 3, 4 and 9) it can be proved that
1112   // BitWidth bits suffice for all the computations in the algorithm implemented
1113   // below. More precisely, this number of bits suffice if the multiplicative
1114   // inverse exists, but may not suffice for the general extended Euclidean
1115   // algorithm.
1116 
1117   APInt r[2] = { modulo, *this };
1118   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1119   APInt q(BitWidth, 0);
1120 
1121   unsigned i;
1122   for (i = 0; r[i^1] != 0; i ^= 1) {
1123     // An overview of the math without the confusing bit-flipping:
1124     // q = r[i-2] / r[i-1]
1125     // r[i] = r[i-2] % r[i-1]
1126     // t[i] = t[i-2] - t[i-1] * q
1127     udivrem(r[i], r[i^1], q, r[i]);
1128     t[i] -= t[i^1] * q;
1129   }
1130 
1131   // If this APInt and the modulo are not coprime, there is no multiplicative
1132   // inverse, so return 0. We check this by looking at the next-to-last
1133   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1134   // algorithm.
1135   if (r[i] != 1)
1136     return APInt(BitWidth, 0);
1137 
1138   // The next-to-last t is the multiplicative inverse.  However, we are
1139   // interested in a positive inverse. Calculate a positive one from a negative
1140   // one if necessary. A simple addition of the modulo suffices because
1141   // abs(t[i]) is known to be less than *this/2 (see the link above).
1142   if (t[i].isNegative())
1143     t[i] += modulo;
1144 
1145   return std::move(t[i]);
1146 }
1147 
1148 /// Calculate the magic numbers required to implement a signed integer division
1149 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1150 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1151 /// Warren, Jr., chapter 10.
1152 APInt::ms APInt::magic() const {
1153   const APInt& d = *this;
1154   unsigned p;
1155   APInt ad, anc, delta, q1, r1, q2, r2, t;
1156   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1157   struct ms mag;
1158 
1159   ad = d.abs();
1160   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1161   anc = t - 1 - t.urem(ad);   // absolute value of nc
1162   p = d.getBitWidth() - 1;    // initialize p
1163   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1164   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1165   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1166   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1167   do {
1168     p = p + 1;
1169     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1170     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1171     if (r1.uge(anc)) {  // must be unsigned comparison
1172       q1 = q1 + 1;
1173       r1 = r1 - anc;
1174     }
1175     q2 = q2<<1;          // update q2 = 2p/abs(d)
1176     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1177     if (r2.uge(ad)) {   // must be unsigned comparison
1178       q2 = q2 + 1;
1179       r2 = r2 - ad;
1180     }
1181     delta = ad - r2;
1182   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1183 
1184   mag.m = q2 + 1;
1185   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1186   mag.s = p - d.getBitWidth();          // resulting shift
1187   return mag;
1188 }
1189 
1190 /// Calculate the magic numbers required to implement an unsigned integer
1191 /// division by a constant as a sequence of multiplies, adds and shifts.
1192 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1193 /// S. Warren, Jr., chapter 10.
1194 /// LeadingZeros can be used to simplify the calculation if the upper bits
1195 /// of the divided value are known zero.
1196 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1197   const APInt& d = *this;
1198   unsigned p;
1199   APInt nc, delta, q1, r1, q2, r2;
1200   struct mu magu;
1201   magu.a = 0;               // initialize "add" indicator
1202   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1203   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1204   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1205 
1206   nc = allOnes - (allOnes - d).urem(d);
1207   p = d.getBitWidth() - 1;  // initialize p
1208   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1209   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1210   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1211   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1212   do {
1213     p = p + 1;
1214     if (r1.uge(nc - r1)) {
1215       q1 = q1 + q1 + 1;  // update q1
1216       r1 = r1 + r1 - nc; // update r1
1217     }
1218     else {
1219       q1 = q1+q1; // update q1
1220       r1 = r1+r1; // update r1
1221     }
1222     if ((r2 + 1).uge(d - r2)) {
1223       if (q2.uge(signedMax)) magu.a = 1;
1224       q2 = q2+q2 + 1;     // update q2
1225       r2 = r2+r2 + 1 - d; // update r2
1226     }
1227     else {
1228       if (q2.uge(signedMin)) magu.a = 1;
1229       q2 = q2+q2;     // update q2
1230       r2 = r2+r2 + 1; // update r2
1231     }
1232     delta = d - 1 - r2;
1233   } while (p < d.getBitWidth()*2 &&
1234            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1235   magu.m = q2 + 1; // resulting magic number
1236   magu.s = p - d.getBitWidth();  // resulting shift
1237   return magu;
1238 }
1239 
1240 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1241 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1242 /// variables here have the same names as in the algorithm. Comments explain
1243 /// the algorithm and any deviation from it.
1244 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1245                      unsigned m, unsigned n) {
1246   assert(u && "Must provide dividend");
1247   assert(v && "Must provide divisor");
1248   assert(q && "Must provide quotient");
1249   assert(u != v && u != q && v != q && "Must use different memory");
1250   assert(n>1 && "n must be > 1");
1251 
1252   // b denotes the base of the number system. In our case b is 2^32.
1253   const uint64_t b = uint64_t(1) << 32;
1254 
1255 // The DEBUG macros here tend to be spam in the debug output if you're not
1256 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1257 #ifdef KNUTH_DEBUG
1258 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1259 #else
1260 #define DEBUG_KNUTH(X) do {} while(false)
1261 #endif
1262 
1263   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1264   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1265   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1266   DEBUG_KNUTH(dbgs() << " by");
1267   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1268   DEBUG_KNUTH(dbgs() << '\n');
1269   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1270   // u and v by d. Note that we have taken Knuth's advice here to use a power
1271   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1272   // 2 allows us to shift instead of multiply and it is easy to determine the
1273   // shift amount from the leading zeros.  We are basically normalizing the u
1274   // and v so that its high bits are shifted to the top of v's range without
1275   // overflow. Note that this can require an extra word in u so that u must
1276   // be of length m+n+1.
1277   unsigned shift = countLeadingZeros(v[n-1]);
1278   uint32_t v_carry = 0;
1279   uint32_t u_carry = 0;
1280   if (shift) {
1281     for (unsigned i = 0; i < m+n; ++i) {
1282       uint32_t u_tmp = u[i] >> (32 - shift);
1283       u[i] = (u[i] << shift) | u_carry;
1284       u_carry = u_tmp;
1285     }
1286     for (unsigned i = 0; i < n; ++i) {
1287       uint32_t v_tmp = v[i] >> (32 - shift);
1288       v[i] = (v[i] << shift) | v_carry;
1289       v_carry = v_tmp;
1290     }
1291   }
1292   u[m+n] = u_carry;
1293 
1294   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1295   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1296   DEBUG_KNUTH(dbgs() << " by");
1297   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1298   DEBUG_KNUTH(dbgs() << '\n');
1299 
1300   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1301   int j = m;
1302   do {
1303     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1304     // D3. [Calculate q'.].
1305     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1306     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1307     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1308     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1309     // on v[n-2] determines at high speed most of the cases in which the trial
1310     // value qp is one too large, and it eliminates all cases where qp is two
1311     // too large.
1312     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1313     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1314     uint64_t qp = dividend / v[n-1];
1315     uint64_t rp = dividend % v[n-1];
1316     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1317       qp--;
1318       rp += v[n-1];
1319       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1320         qp--;
1321     }
1322     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1323 
1324     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1325     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1326     // consists of a simple multiplication by a one-place number, combined with
1327     // a subtraction.
1328     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1329     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1330     // true value plus b**(n+1), namely as the b's complement of
1331     // the true value, and a "borrow" to the left should be remembered.
1332     int64_t borrow = 0;
1333     for (unsigned i = 0; i < n; ++i) {
1334       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1335       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1336       u[j+i] = Lo_32(subres);
1337       borrow = Hi_32(p) - Hi_32(subres);
1338       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1339                         << ", borrow = " << borrow << '\n');
1340     }
1341     bool isNeg = u[j+n] < borrow;
1342     u[j+n] -= Lo_32(borrow);
1343 
1344     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1345     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1346     DEBUG_KNUTH(dbgs() << '\n');
1347 
1348     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1349     // negative, go to step D6; otherwise go on to step D7.
1350     q[j] = Lo_32(qp);
1351     if (isNeg) {
1352       // D6. [Add back]. The probability that this step is necessary is very
1353       // small, on the order of only 2/b. Make sure that test data accounts for
1354       // this possibility. Decrease q[j] by 1
1355       q[j]--;
1356       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1357       // A carry will occur to the left of u[j+n], and it should be ignored
1358       // since it cancels with the borrow that occurred in D4.
1359       bool carry = false;
1360       for (unsigned i = 0; i < n; i++) {
1361         uint32_t limit = std::min(u[j+i],v[i]);
1362         u[j+i] += v[i] + carry;
1363         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1364       }
1365       u[j+n] += carry;
1366     }
1367     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1368     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1369     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1370 
1371     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1372   } while (--j >= 0);
1373 
1374   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1375   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1376   DEBUG_KNUTH(dbgs() << '\n');
1377 
1378   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1379   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1380   // compute the remainder (urem uses this).
1381   if (r) {
1382     // The value d is expressed by the "shift" value above since we avoided
1383     // multiplication by d by using a shift left. So, all we have to do is
1384     // shift right here.
1385     if (shift) {
1386       uint32_t carry = 0;
1387       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1388       for (int i = n-1; i >= 0; i--) {
1389         r[i] = (u[i] >> shift) | carry;
1390         carry = u[i] << (32 - shift);
1391         DEBUG_KNUTH(dbgs() << " " << r[i]);
1392       }
1393     } else {
1394       for (int i = n-1; i >= 0; i--) {
1395         r[i] = u[i];
1396         DEBUG_KNUTH(dbgs() << " " << r[i]);
1397       }
1398     }
1399     DEBUG_KNUTH(dbgs() << '\n');
1400   }
1401   DEBUG_KNUTH(dbgs() << '\n');
1402 }
1403 
1404 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1405                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1406   assert(lhsWords >= rhsWords && "Fractional result");
1407 
1408   // First, compose the values into an array of 32-bit words instead of
1409   // 64-bit words. This is a necessity of both the "short division" algorithm
1410   // and the Knuth "classical algorithm" which requires there to be native
1411   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1412   // can't use 64-bit operands here because we don't have native results of
1413   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1414   // work on large-endian machines.
1415   unsigned n = rhsWords * 2;
1416   unsigned m = (lhsWords * 2) - n;
1417 
1418   // Allocate space for the temporary values we need either on the stack, if
1419   // it will fit, or on the heap if it won't.
1420   uint32_t SPACE[128];
1421   uint32_t *U = nullptr;
1422   uint32_t *V = nullptr;
1423   uint32_t *Q = nullptr;
1424   uint32_t *R = nullptr;
1425   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1426     U = &SPACE[0];
1427     V = &SPACE[m+n+1];
1428     Q = &SPACE[(m+n+1) + n];
1429     if (Remainder)
1430       R = &SPACE[(m+n+1) + n + (m+n)];
1431   } else {
1432     U = new uint32_t[m + n + 1];
1433     V = new uint32_t[n];
1434     Q = new uint32_t[m+n];
1435     if (Remainder)
1436       R = new uint32_t[n];
1437   }
1438 
1439   // Initialize the dividend
1440   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1441   for (unsigned i = 0; i < lhsWords; ++i) {
1442     uint64_t tmp = LHS[i];
1443     U[i * 2] = Lo_32(tmp);
1444     U[i * 2 + 1] = Hi_32(tmp);
1445   }
1446   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1447 
1448   // Initialize the divisor
1449   memset(V, 0, (n)*sizeof(uint32_t));
1450   for (unsigned i = 0; i < rhsWords; ++i) {
1451     uint64_t tmp = RHS[i];
1452     V[i * 2] = Lo_32(tmp);
1453     V[i * 2 + 1] = Hi_32(tmp);
1454   }
1455 
1456   // initialize the quotient and remainder
1457   memset(Q, 0, (m+n) * sizeof(uint32_t));
1458   if (Remainder)
1459     memset(R, 0, n * sizeof(uint32_t));
1460 
1461   // Now, adjust m and n for the Knuth division. n is the number of words in
1462   // the divisor. m is the number of words by which the dividend exceeds the
1463   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1464   // contain any zero words or the Knuth algorithm fails.
1465   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1466     n--;
1467     m++;
1468   }
1469   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1470     m--;
1471 
1472   // If we're left with only a single word for the divisor, Knuth doesn't work
1473   // so we implement the short division algorithm here. This is much simpler
1474   // and faster because we are certain that we can divide a 64-bit quantity
1475   // by a 32-bit quantity at hardware speed and short division is simply a
1476   // series of such operations. This is just like doing short division but we
1477   // are using base 2^32 instead of base 10.
1478   assert(n != 0 && "Divide by zero?");
1479   if (n == 1) {
1480     uint32_t divisor = V[0];
1481     uint32_t remainder = 0;
1482     for (int i = m; i >= 0; i--) {
1483       uint64_t partial_dividend = Make_64(remainder, U[i]);
1484       if (partial_dividend == 0) {
1485         Q[i] = 0;
1486         remainder = 0;
1487       } else if (partial_dividend < divisor) {
1488         Q[i] = 0;
1489         remainder = Lo_32(partial_dividend);
1490       } else if (partial_dividend == divisor) {
1491         Q[i] = 1;
1492         remainder = 0;
1493       } else {
1494         Q[i] = Lo_32(partial_dividend / divisor);
1495         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1496       }
1497     }
1498     if (R)
1499       R[0] = remainder;
1500   } else {
1501     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1502     // case n > 1.
1503     KnuthDiv(U, V, Q, R, m, n);
1504   }
1505 
1506   // If the caller wants the quotient
1507   if (Quotient) {
1508     for (unsigned i = 0; i < lhsWords; ++i)
1509       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1510   }
1511 
1512   // If the caller wants the remainder
1513   if (Remainder) {
1514     for (unsigned i = 0; i < rhsWords; ++i)
1515       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1516   }
1517 
1518   // Clean up the memory we allocated.
1519   if (U != &SPACE[0]) {
1520     delete [] U;
1521     delete [] V;
1522     delete [] Q;
1523     delete [] R;
1524   }
1525 }
1526 
1527 APInt APInt::udiv(const APInt &RHS) const {
1528   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1529 
1530   // First, deal with the easy case
1531   if (isSingleWord()) {
1532     assert(RHS.U.VAL != 0 && "Divide by zero?");
1533     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1534   }
1535 
1536   // Get some facts about the LHS and RHS number of bits and words
1537   unsigned lhsWords = getNumWords(getActiveBits());
1538   unsigned rhsBits  = RHS.getActiveBits();
1539   unsigned rhsWords = getNumWords(rhsBits);
1540   assert(rhsWords && "Divided by zero???");
1541 
1542   // Deal with some degenerate cases
1543   if (!lhsWords)
1544     // 0 / X ===> 0
1545     return APInt(BitWidth, 0);
1546   if (rhsBits == 1)
1547     // X / 1 ===> X
1548     return *this;
1549   if (lhsWords < rhsWords || this->ult(RHS))
1550     // X / Y ===> 0, iff X < Y
1551     return APInt(BitWidth, 0);
1552   if (*this == RHS)
1553     // X / X ===> 1
1554     return APInt(BitWidth, 1);
1555   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1556     // All high words are zero, just use native divide
1557     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1558 
1559   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1560   APInt Quotient(BitWidth, 0); // to hold result.
1561   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1562   return Quotient;
1563 }
1564 
1565 APInt APInt::udiv(uint64_t RHS) const {
1566   assert(RHS != 0 && "Divide by zero?");
1567 
1568   // First, deal with the easy case
1569   if (isSingleWord())
1570     return APInt(BitWidth, U.VAL / RHS);
1571 
1572   // Get some facts about the LHS words.
1573   unsigned lhsWords = getNumWords(getActiveBits());
1574 
1575   // Deal with some degenerate cases
1576   if (!lhsWords)
1577     // 0 / X ===> 0
1578     return APInt(BitWidth, 0);
1579   if (RHS == 1)
1580     // X / 1 ===> X
1581     return *this;
1582   if (this->ult(RHS))
1583     // X / Y ===> 0, iff X < Y
1584     return APInt(BitWidth, 0);
1585   if (*this == RHS)
1586     // X / X ===> 1
1587     return APInt(BitWidth, 1);
1588   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1589     // All high words are zero, just use native divide
1590     return APInt(BitWidth, this->U.pVal[0] / RHS);
1591 
1592   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1593   APInt Quotient(BitWidth, 0); // to hold result.
1594   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1595   return Quotient;
1596 }
1597 
1598 APInt APInt::sdiv(const APInt &RHS) const {
1599   if (isNegative()) {
1600     if (RHS.isNegative())
1601       return (-(*this)).udiv(-RHS);
1602     return -((-(*this)).udiv(RHS));
1603   }
1604   if (RHS.isNegative())
1605     return -(this->udiv(-RHS));
1606   return this->udiv(RHS);
1607 }
1608 
1609 APInt APInt::sdiv(int64_t RHS) const {
1610   if (isNegative()) {
1611     if (RHS < 0)
1612       return (-(*this)).udiv(-RHS);
1613     return -((-(*this)).udiv(RHS));
1614   }
1615   if (RHS < 0)
1616     return -(this->udiv(-RHS));
1617   return this->udiv(RHS);
1618 }
1619 
1620 APInt APInt::urem(const APInt &RHS) const {
1621   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1622   if (isSingleWord()) {
1623     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1624     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1625   }
1626 
1627   // Get some facts about the LHS
1628   unsigned lhsWords = getNumWords(getActiveBits());
1629 
1630   // Get some facts about the RHS
1631   unsigned rhsBits = RHS.getActiveBits();
1632   unsigned rhsWords = getNumWords(rhsBits);
1633   assert(rhsWords && "Performing remainder operation by zero ???");
1634 
1635   // Check the degenerate cases
1636   if (lhsWords == 0)
1637     // 0 % Y ===> 0
1638     return APInt(BitWidth, 0);
1639   if (rhsBits == 1)
1640     // X % 1 ===> 0
1641     return APInt(BitWidth, 0);
1642   if (lhsWords < rhsWords || this->ult(RHS))
1643     // X % Y ===> X, iff X < Y
1644     return *this;
1645   if (*this == RHS)
1646     // X % X == 0;
1647     return APInt(BitWidth, 0);
1648   if (lhsWords == 1)
1649     // All high words are zero, just use native remainder
1650     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1651 
1652   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1653   APInt Remainder(BitWidth, 0);
1654   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1655   return Remainder;
1656 }
1657 
1658 uint64_t APInt::urem(uint64_t RHS) const {
1659   assert(RHS != 0 && "Remainder by zero?");
1660 
1661   if (isSingleWord())
1662     return U.VAL % RHS;
1663 
1664   // Get some facts about the LHS
1665   unsigned lhsWords = getNumWords(getActiveBits());
1666 
1667   // Check the degenerate cases
1668   if (lhsWords == 0)
1669     // 0 % Y ===> 0
1670     return 0;
1671   if (RHS == 1)
1672     // X % 1 ===> 0
1673     return 0;
1674   if (this->ult(RHS))
1675     // X % Y ===> X, iff X < Y
1676     return getZExtValue();
1677   if (*this == RHS)
1678     // X % X == 0;
1679     return 0;
1680   if (lhsWords == 1)
1681     // All high words are zero, just use native remainder
1682     return U.pVal[0] % RHS;
1683 
1684   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1685   uint64_t Remainder;
1686   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1687   return Remainder;
1688 }
1689 
1690 APInt APInt::srem(const APInt &RHS) const {
1691   if (isNegative()) {
1692     if (RHS.isNegative())
1693       return -((-(*this)).urem(-RHS));
1694     return -((-(*this)).urem(RHS));
1695   }
1696   if (RHS.isNegative())
1697     return this->urem(-RHS);
1698   return this->urem(RHS);
1699 }
1700 
1701 int64_t APInt::srem(int64_t RHS) const {
1702   if (isNegative()) {
1703     if (RHS < 0)
1704       return -((-(*this)).urem(-RHS));
1705     return -((-(*this)).urem(RHS));
1706   }
1707   if (RHS < 0)
1708     return this->urem(-RHS);
1709   return this->urem(RHS);
1710 }
1711 
1712 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1713                     APInt &Quotient, APInt &Remainder) {
1714   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1715   unsigned BitWidth = LHS.BitWidth;
1716 
1717   // First, deal with the easy case
1718   if (LHS.isSingleWord()) {
1719     assert(RHS.U.VAL != 0 && "Divide by zero?");
1720     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1721     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1722     Quotient = APInt(BitWidth, QuotVal);
1723     Remainder = APInt(BitWidth, RemVal);
1724     return;
1725   }
1726 
1727   // Get some size facts about the dividend and divisor
1728   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1729   unsigned rhsBits  = RHS.getActiveBits();
1730   unsigned rhsWords = getNumWords(rhsBits);
1731   assert(rhsWords && "Performing divrem operation by zero ???");
1732 
1733   // Check the degenerate cases
1734   if (lhsWords == 0) {
1735     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1736     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1737     return;
1738   }
1739 
1740   if (rhsBits == 1) {
1741     Quotient = LHS;                   // X / 1 ===> X
1742     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1743   }
1744 
1745   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1746     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1747     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1748     return;
1749   }
1750 
1751   if (LHS == RHS) {
1752     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1753     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1754     return;
1755   }
1756 
1757   // Make sure there is enough space to hold the results.
1758   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1759   // change the size. This is necessary if Quotient or Remainder is aliased
1760   // with LHS or RHS.
1761   Quotient.reallocate(BitWidth);
1762   Remainder.reallocate(BitWidth);
1763 
1764   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1765     // There is only one word to consider so use the native versions.
1766     uint64_t lhsValue = LHS.U.pVal[0];
1767     uint64_t rhsValue = RHS.U.pVal[0];
1768     Quotient = lhsValue / rhsValue;
1769     Remainder = lhsValue % rhsValue;
1770     return;
1771   }
1772 
1773   // Okay, lets do it the long way
1774   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1775          Remainder.U.pVal);
1776   // Clear the rest of the Quotient and Remainder.
1777   std::memset(Quotient.U.pVal + lhsWords, 0,
1778               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1779   std::memset(Remainder.U.pVal + rhsWords, 0,
1780               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1781 }
1782 
1783 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1784                     uint64_t &Remainder) {
1785   assert(RHS != 0 && "Divide by zero?");
1786   unsigned BitWidth = LHS.BitWidth;
1787 
1788   // First, deal with the easy case
1789   if (LHS.isSingleWord()) {
1790     uint64_t QuotVal = LHS.U.VAL / RHS;
1791     Remainder = LHS.U.VAL % RHS;
1792     Quotient = APInt(BitWidth, QuotVal);
1793     return;
1794   }
1795 
1796   // Get some size facts about the dividend and divisor
1797   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1798 
1799   // Check the degenerate cases
1800   if (lhsWords == 0) {
1801     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1802     Remainder = 0;                    // 0 % Y ===> 0
1803     return;
1804   }
1805 
1806   if (RHS == 1) {
1807     Quotient = LHS;                   // X / 1 ===> X
1808     Remainder = 0;                    // X % 1 ===> 0
1809     return;
1810   }
1811 
1812   if (LHS.ult(RHS)) {
1813     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1814     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1815     return;
1816   }
1817 
1818   if (LHS == RHS) {
1819     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1820     Remainder = 0;                    // X % X ===> 0;
1821     return;
1822   }
1823 
1824   // Make sure there is enough space to hold the results.
1825   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1826   // change the size. This is necessary if Quotient is aliased with LHS.
1827   Quotient.reallocate(BitWidth);
1828 
1829   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1830     // There is only one word to consider so use the native versions.
1831     uint64_t lhsValue = LHS.U.pVal[0];
1832     Quotient = lhsValue / RHS;
1833     Remainder = lhsValue % RHS;
1834     return;
1835   }
1836 
1837   // Okay, lets do it the long way
1838   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1839   // Clear the rest of the Quotient.
1840   std::memset(Quotient.U.pVal + lhsWords, 0,
1841               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1842 }
1843 
1844 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1845                     APInt &Quotient, APInt &Remainder) {
1846   if (LHS.isNegative()) {
1847     if (RHS.isNegative())
1848       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1849     else {
1850       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1851       Quotient.negate();
1852     }
1853     Remainder.negate();
1854   } else if (RHS.isNegative()) {
1855     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1856     Quotient.negate();
1857   } else {
1858     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1859   }
1860 }
1861 
1862 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1863                     APInt &Quotient, int64_t &Remainder) {
1864   uint64_t R = Remainder;
1865   if (LHS.isNegative()) {
1866     if (RHS < 0)
1867       APInt::udivrem(-LHS, -RHS, Quotient, R);
1868     else {
1869       APInt::udivrem(-LHS, RHS, Quotient, R);
1870       Quotient.negate();
1871     }
1872     R = -R;
1873   } else if (RHS < 0) {
1874     APInt::udivrem(LHS, -RHS, Quotient, R);
1875     Quotient.negate();
1876   } else {
1877     APInt::udivrem(LHS, RHS, Quotient, R);
1878   }
1879   Remainder = R;
1880 }
1881 
1882 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1883   APInt Res = *this+RHS;
1884   Overflow = isNonNegative() == RHS.isNonNegative() &&
1885              Res.isNonNegative() != isNonNegative();
1886   return Res;
1887 }
1888 
1889 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1890   APInt Res = *this+RHS;
1891   Overflow = Res.ult(RHS);
1892   return Res;
1893 }
1894 
1895 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1896   APInt Res = *this - RHS;
1897   Overflow = isNonNegative() != RHS.isNonNegative() &&
1898              Res.isNonNegative() != isNonNegative();
1899   return Res;
1900 }
1901 
1902 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1903   APInt Res = *this-RHS;
1904   Overflow = Res.ugt(*this);
1905   return Res;
1906 }
1907 
1908 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1909   // MININT/-1  -->  overflow.
1910   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1911   return sdiv(RHS);
1912 }
1913 
1914 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1915   APInt Res = *this * RHS;
1916 
1917   if (*this != 0 && RHS != 0)
1918     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1919   else
1920     Overflow = false;
1921   return Res;
1922 }
1923 
1924 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1925   APInt Res = *this * RHS;
1926 
1927   if (*this != 0 && RHS != 0)
1928     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1929   else
1930     Overflow = false;
1931   return Res;
1932 }
1933 
1934 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1935   Overflow = ShAmt.uge(getBitWidth());
1936   if (Overflow)
1937     return APInt(BitWidth, 0);
1938 
1939   if (isNonNegative()) // Don't allow sign change.
1940     Overflow = ShAmt.uge(countLeadingZeros());
1941   else
1942     Overflow = ShAmt.uge(countLeadingOnes());
1943 
1944   return *this << ShAmt;
1945 }
1946 
1947 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1948   Overflow = ShAmt.uge(getBitWidth());
1949   if (Overflow)
1950     return APInt(BitWidth, 0);
1951 
1952   Overflow = ShAmt.ugt(countLeadingZeros());
1953 
1954   return *this << ShAmt;
1955 }
1956 
1957 
1958 
1959 
1960 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1961   // Check our assumptions here
1962   assert(!str.empty() && "Invalid string length");
1963   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1964           radix == 36) &&
1965          "Radix should be 2, 8, 10, 16, or 36!");
1966 
1967   StringRef::iterator p = str.begin();
1968   size_t slen = str.size();
1969   bool isNeg = *p == '-';
1970   if (*p == '-' || *p == '+') {
1971     p++;
1972     slen--;
1973     assert(slen && "String is only a sign, needs a value.");
1974   }
1975   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1976   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
1977   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
1978   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
1979          "Insufficient bit width");
1980 
1981   // Allocate memory if needed
1982   if (isSingleWord())
1983     U.VAL = 0;
1984   else
1985     U.pVal = getClearedMemory(getNumWords());
1986 
1987   // Figure out if we can shift instead of multiply
1988   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1989 
1990   // Enter digit traversal loop
1991   for (StringRef::iterator e = str.end(); p != e; ++p) {
1992     unsigned digit = getDigit(*p, radix);
1993     assert(digit < radix && "Invalid character in digit string");
1994 
1995     // Shift or multiply the value by the radix
1996     if (slen > 1) {
1997       if (shift)
1998         *this <<= shift;
1999       else
2000         *this *= radix;
2001     }
2002 
2003     // Add in the digit we just interpreted
2004     *this += digit;
2005   }
2006   // If its negative, put it in two's complement form
2007   if (isNeg)
2008     this->negate();
2009 }
2010 
2011 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2012                      bool Signed, bool formatAsCLiteral) const {
2013   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2014           Radix == 36) &&
2015          "Radix should be 2, 8, 10, 16, or 36!");
2016 
2017   const char *Prefix = "";
2018   if (formatAsCLiteral) {
2019     switch (Radix) {
2020       case 2:
2021         // Binary literals are a non-standard extension added in gcc 4.3:
2022         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2023         Prefix = "0b";
2024         break;
2025       case 8:
2026         Prefix = "0";
2027         break;
2028       case 10:
2029         break; // No prefix
2030       case 16:
2031         Prefix = "0x";
2032         break;
2033       default:
2034         llvm_unreachable("Invalid radix!");
2035     }
2036   }
2037 
2038   // First, check for a zero value and just short circuit the logic below.
2039   if (*this == 0) {
2040     while (*Prefix) {
2041       Str.push_back(*Prefix);
2042       ++Prefix;
2043     };
2044     Str.push_back('0');
2045     return;
2046   }
2047 
2048   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2049 
2050   if (isSingleWord()) {
2051     char Buffer[65];
2052     char *BufPtr = std::end(Buffer);
2053 
2054     uint64_t N;
2055     if (!Signed) {
2056       N = getZExtValue();
2057     } else {
2058       int64_t I = getSExtValue();
2059       if (I >= 0) {
2060         N = I;
2061       } else {
2062         Str.push_back('-');
2063         N = -(uint64_t)I;
2064       }
2065     }
2066 
2067     while (*Prefix) {
2068       Str.push_back(*Prefix);
2069       ++Prefix;
2070     };
2071 
2072     while (N) {
2073       *--BufPtr = Digits[N % Radix];
2074       N /= Radix;
2075     }
2076     Str.append(BufPtr, std::end(Buffer));
2077     return;
2078   }
2079 
2080   APInt Tmp(*this);
2081 
2082   if (Signed && isNegative()) {
2083     // They want to print the signed version and it is a negative value
2084     // Flip the bits and add one to turn it into the equivalent positive
2085     // value and put a '-' in the result.
2086     Tmp.negate();
2087     Str.push_back('-');
2088   }
2089 
2090   while (*Prefix) {
2091     Str.push_back(*Prefix);
2092     ++Prefix;
2093   };
2094 
2095   // We insert the digits backward, then reverse them to get the right order.
2096   unsigned StartDig = Str.size();
2097 
2098   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2099   // because the number of bits per digit (1, 3 and 4 respectively) divides
2100   // equally.  We just shift until the value is zero.
2101   if (Radix == 2 || Radix == 8 || Radix == 16) {
2102     // Just shift tmp right for each digit width until it becomes zero
2103     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2104     unsigned MaskAmt = Radix - 1;
2105 
2106     while (Tmp.getBoolValue()) {
2107       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2108       Str.push_back(Digits[Digit]);
2109       Tmp.lshrInPlace(ShiftAmt);
2110     }
2111   } else {
2112     while (Tmp.getBoolValue()) {
2113       uint64_t Digit;
2114       udivrem(Tmp, Radix, Tmp, Digit);
2115       assert(Digit < Radix && "divide failed");
2116       Str.push_back(Digits[Digit]);
2117     }
2118   }
2119 
2120   // Reverse the digits before returning.
2121   std::reverse(Str.begin()+StartDig, Str.end());
2122 }
2123 
2124 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2125 /// It is better to pass in a SmallVector/SmallString to the methods above.
2126 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2127   SmallString<40> S;
2128   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2129   return S.str();
2130 }
2131 
2132 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2133 LLVM_DUMP_METHOD void APInt::dump() const {
2134   SmallString<40> S, U;
2135   this->toStringUnsigned(U);
2136   this->toStringSigned(S);
2137   dbgs() << "APInt(" << BitWidth << "b, "
2138          << U << "u " << S << "s)\n";
2139 }
2140 #endif
2141 
2142 void APInt::print(raw_ostream &OS, bool isSigned) const {
2143   SmallString<40> S;
2144   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2145   OS << S;
2146 }
2147 
2148 // This implements a variety of operations on a representation of
2149 // arbitrary precision, two's-complement, bignum integer values.
2150 
2151 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2152 // and unrestricting assumption.
2153 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2154               "Part width must be divisible by 2!");
2155 
2156 /* Some handy functions local to this file.  */
2157 
2158 /* Returns the integer part with the least significant BITS set.
2159    BITS cannot be zero.  */
2160 static inline APInt::WordType lowBitMask(unsigned bits) {
2161   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2162 
2163   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2164 }
2165 
2166 /* Returns the value of the lower half of PART.  */
2167 static inline APInt::WordType lowHalf(APInt::WordType part) {
2168   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2169 }
2170 
2171 /* Returns the value of the upper half of PART.  */
2172 static inline APInt::WordType highHalf(APInt::WordType part) {
2173   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2174 }
2175 
2176 /* Returns the bit number of the most significant set bit of a part.
2177    If the input number has no bits set -1U is returned.  */
2178 static unsigned partMSB(APInt::WordType value) {
2179   return findLastSet(value, ZB_Max);
2180 }
2181 
2182 /* Returns the bit number of the least significant set bit of a
2183    part.  If the input number has no bits set -1U is returned.  */
2184 static unsigned partLSB(APInt::WordType value) {
2185   return findFirstSet(value, ZB_Max);
2186 }
2187 
2188 /* Sets the least significant part of a bignum to the input value, and
2189    zeroes out higher parts.  */
2190 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2191   assert(parts > 0);
2192 
2193   dst[0] = part;
2194   for (unsigned i = 1; i < parts; i++)
2195     dst[i] = 0;
2196 }
2197 
2198 /* Assign one bignum to another.  */
2199 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2200   for (unsigned i = 0; i < parts; i++)
2201     dst[i] = src[i];
2202 }
2203 
2204 /* Returns true if a bignum is zero, false otherwise.  */
2205 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2206   for (unsigned i = 0; i < parts; i++)
2207     if (src[i])
2208       return false;
2209 
2210   return true;
2211 }
2212 
2213 /* Extract the given bit of a bignum; returns 0 or 1.  */
2214 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2215   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2216 }
2217 
2218 /* Set the given bit of a bignum. */
2219 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2220   parts[whichWord(bit)] |= maskBit(bit);
2221 }
2222 
2223 /* Clears the given bit of a bignum. */
2224 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2225   parts[whichWord(bit)] &= ~maskBit(bit);
2226 }
2227 
2228 /* Returns the bit number of the least significant set bit of a
2229    number.  If the input number has no bits set -1U is returned.  */
2230 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2231   for (unsigned i = 0; i < n; i++) {
2232     if (parts[i] != 0) {
2233       unsigned lsb = partLSB(parts[i]);
2234 
2235       return lsb + i * APINT_BITS_PER_WORD;
2236     }
2237   }
2238 
2239   return -1U;
2240 }
2241 
2242 /* Returns the bit number of the most significant set bit of a number.
2243    If the input number has no bits set -1U is returned.  */
2244 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2245   do {
2246     --n;
2247 
2248     if (parts[n] != 0) {
2249       unsigned msb = partMSB(parts[n]);
2250 
2251       return msb + n * APINT_BITS_PER_WORD;
2252     }
2253   } while (n);
2254 
2255   return -1U;
2256 }
2257 
2258 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2259    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2260    the least significant bit of DST.  All high bits above srcBITS in
2261    DST are zero-filled.  */
2262 void
2263 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2264                  unsigned srcBits, unsigned srcLSB) {
2265   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2266   assert(dstParts <= dstCount);
2267 
2268   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2269   tcAssign (dst, src + firstSrcPart, dstParts);
2270 
2271   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2272   tcShiftRight (dst, dstParts, shift);
2273 
2274   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2275      in DST.  If this is less that srcBits, append the rest, else
2276      clear the high bits.  */
2277   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2278   if (n < srcBits) {
2279     WordType mask = lowBitMask (srcBits - n);
2280     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2281                           << n % APINT_BITS_PER_WORD);
2282   } else if (n > srcBits) {
2283     if (srcBits % APINT_BITS_PER_WORD)
2284       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2285   }
2286 
2287   /* Clear high parts.  */
2288   while (dstParts < dstCount)
2289     dst[dstParts++] = 0;
2290 }
2291 
2292 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2293 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2294                              WordType c, unsigned parts) {
2295   assert(c <= 1);
2296 
2297   for (unsigned i = 0; i < parts; i++) {
2298     WordType l = dst[i];
2299     if (c) {
2300       dst[i] += rhs[i] + 1;
2301       c = (dst[i] <= l);
2302     } else {
2303       dst[i] += rhs[i];
2304       c = (dst[i] < l);
2305     }
2306   }
2307 
2308   return c;
2309 }
2310 
2311 /// This function adds a single "word" integer, src, to the multiple
2312 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2313 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2314 /// @returns the carry of the addition.
2315 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2316                                  unsigned parts) {
2317   for (unsigned i = 0; i < parts; ++i) {
2318     dst[i] += src;
2319     if (dst[i] >= src)
2320       return 0; // No need to carry so exit early.
2321     src = 1; // Carry one to next digit.
2322   }
2323 
2324   return 1;
2325 }
2326 
2327 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2328 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2329                                   WordType c, unsigned parts) {
2330   assert(c <= 1);
2331 
2332   for (unsigned i = 0; i < parts; i++) {
2333     WordType l = dst[i];
2334     if (c) {
2335       dst[i] -= rhs[i] + 1;
2336       c = (dst[i] >= l);
2337     } else {
2338       dst[i] -= rhs[i];
2339       c = (dst[i] > l);
2340     }
2341   }
2342 
2343   return c;
2344 }
2345 
2346 /// This function subtracts a single "word" (64-bit word), src, from
2347 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2348 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2349 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2350 /// exhausted. In other words, if src > dst then this function returns 1,
2351 /// otherwise 0.
2352 /// @returns the borrow out of the subtraction
2353 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2354                                       unsigned parts) {
2355   for (unsigned i = 0; i < parts; ++i) {
2356     WordType Dst = dst[i];
2357     dst[i] -= src;
2358     if (src <= Dst)
2359       return 0; // No need to borrow so exit early.
2360     src = 1; // We have to "borrow 1" from next "word"
2361   }
2362 
2363   return 1;
2364 }
2365 
2366 /* Negate a bignum in-place.  */
2367 void APInt::tcNegate(WordType *dst, unsigned parts) {
2368   tcComplement(dst, parts);
2369   tcIncrement(dst, parts);
2370 }
2371 
2372 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2373     DST  = SRC * MULTIPLIER + CARRY   if add is false
2374 
2375     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2376     they must start at the same point, i.e. DST == SRC.
2377 
2378     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2379     returned.  Otherwise DST is filled with the least significant
2380     DSTPARTS parts of the result, and if all of the omitted higher
2381     parts were zero return zero, otherwise overflow occurred and
2382     return one.  */
2383 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2384                           WordType multiplier, WordType carry,
2385                           unsigned srcParts, unsigned dstParts,
2386                           bool add) {
2387   /* Otherwise our writes of DST kill our later reads of SRC.  */
2388   assert(dst <= src || dst >= src + srcParts);
2389   assert(dstParts <= srcParts + 1);
2390 
2391   /* N loops; minimum of dstParts and srcParts.  */
2392   unsigned n = std::min(dstParts, srcParts);
2393 
2394   for (unsigned i = 0; i < n; i++) {
2395     WordType low, mid, high, srcPart;
2396 
2397       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2398 
2399          This cannot overflow, because
2400 
2401          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2402 
2403          which is less than n^2.  */
2404 
2405     srcPart = src[i];
2406 
2407     if (multiplier == 0 || srcPart == 0) {
2408       low = carry;
2409       high = 0;
2410     } else {
2411       low = lowHalf(srcPart) * lowHalf(multiplier);
2412       high = highHalf(srcPart) * highHalf(multiplier);
2413 
2414       mid = lowHalf(srcPart) * highHalf(multiplier);
2415       high += highHalf(mid);
2416       mid <<= APINT_BITS_PER_WORD / 2;
2417       if (low + mid < low)
2418         high++;
2419       low += mid;
2420 
2421       mid = highHalf(srcPart) * lowHalf(multiplier);
2422       high += highHalf(mid);
2423       mid <<= APINT_BITS_PER_WORD / 2;
2424       if (low + mid < low)
2425         high++;
2426       low += mid;
2427 
2428       /* Now add carry.  */
2429       if (low + carry < low)
2430         high++;
2431       low += carry;
2432     }
2433 
2434     if (add) {
2435       /* And now DST[i], and store the new low part there.  */
2436       if (low + dst[i] < low)
2437         high++;
2438       dst[i] += low;
2439     } else
2440       dst[i] = low;
2441 
2442     carry = high;
2443   }
2444 
2445   if (srcParts < dstParts) {
2446     /* Full multiplication, there is no overflow.  */
2447     assert(srcParts + 1 == dstParts);
2448     dst[srcParts] = carry;
2449     return 0;
2450   }
2451 
2452   /* We overflowed if there is carry.  */
2453   if (carry)
2454     return 1;
2455 
2456   /* We would overflow if any significant unwritten parts would be
2457      non-zero.  This is true if any remaining src parts are non-zero
2458      and the multiplier is non-zero.  */
2459   if (multiplier)
2460     for (unsigned i = dstParts; i < srcParts; i++)
2461       if (src[i])
2462         return 1;
2463 
2464   /* We fitted in the narrow destination.  */
2465   return 0;
2466 }
2467 
2468 /* DST = LHS * RHS, where DST has the same width as the operands and
2469    is filled with the least significant parts of the result.  Returns
2470    one if overflow occurred, otherwise zero.  DST must be disjoint
2471    from both operands.  */
2472 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2473                       const WordType *rhs, unsigned parts) {
2474   assert(dst != lhs && dst != rhs);
2475 
2476   int overflow = 0;
2477   tcSet(dst, 0, parts);
2478 
2479   for (unsigned i = 0; i < parts; i++)
2480     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2481                                parts - i, true);
2482 
2483   return overflow;
2484 }
2485 
2486 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2487 /// operands. No overflow occurs. DST must be disjoint from both operands.
2488 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2489                            const WordType *rhs, unsigned lhsParts,
2490                            unsigned rhsParts) {
2491   /* Put the narrower number on the LHS for less loops below.  */
2492   if (lhsParts > rhsParts)
2493     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2494 
2495   assert(dst != lhs && dst != rhs);
2496 
2497   tcSet(dst, 0, rhsParts);
2498 
2499   for (unsigned i = 0; i < lhsParts; i++)
2500     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2501 }
2502 
2503 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2504    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2505    set REMAINDER to the remainder, return zero.  i.e.
2506 
2507    OLD_LHS = RHS * LHS + REMAINDER
2508 
2509    SCRATCH is a bignum of the same size as the operands and result for
2510    use by the routine; its contents need not be initialized and are
2511    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2512 */
2513 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2514                     WordType *remainder, WordType *srhs,
2515                     unsigned parts) {
2516   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2517 
2518   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2519   if (shiftCount == 0)
2520     return true;
2521 
2522   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2523   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2524   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2525 
2526   tcAssign(srhs, rhs, parts);
2527   tcShiftLeft(srhs, parts, shiftCount);
2528   tcAssign(remainder, lhs, parts);
2529   tcSet(lhs, 0, parts);
2530 
2531   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2532      the total.  */
2533   for (;;) {
2534     int compare = tcCompare(remainder, srhs, parts);
2535     if (compare >= 0) {
2536       tcSubtract(remainder, srhs, 0, parts);
2537       lhs[n] |= mask;
2538     }
2539 
2540     if (shiftCount == 0)
2541       break;
2542     shiftCount--;
2543     tcShiftRight(srhs, parts, 1);
2544     if ((mask >>= 1) == 0) {
2545       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2546       n--;
2547     }
2548   }
2549 
2550   return false;
2551 }
2552 
2553 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2554 /// no restrictions on Count.
2555 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2556   // Don't bother performing a no-op shift.
2557   if (!Count)
2558     return;
2559 
2560   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2561   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2562   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2563 
2564   // Fastpath for moving by whole words.
2565   if (BitShift == 0) {
2566     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2567   } else {
2568     while (Words-- > WordShift) {
2569       Dst[Words] = Dst[Words - WordShift] << BitShift;
2570       if (Words > WordShift)
2571         Dst[Words] |=
2572           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2573     }
2574   }
2575 
2576   // Fill in the remainder with 0s.
2577   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2578 }
2579 
2580 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2581 /// are no restrictions on Count.
2582 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2583   // Don't bother performing a no-op shift.
2584   if (!Count)
2585     return;
2586 
2587   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2588   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2589   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2590 
2591   unsigned WordsToMove = Words - WordShift;
2592   // Fastpath for moving by whole words.
2593   if (BitShift == 0) {
2594     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2595   } else {
2596     for (unsigned i = 0; i != WordsToMove; ++i) {
2597       Dst[i] = Dst[i + WordShift] >> BitShift;
2598       if (i + 1 != WordsToMove)
2599         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2600     }
2601   }
2602 
2603   // Fill in the remainder with 0s.
2604   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2605 }
2606 
2607 /* Bitwise and of two bignums.  */
2608 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2609   for (unsigned i = 0; i < parts; i++)
2610     dst[i] &= rhs[i];
2611 }
2612 
2613 /* Bitwise inclusive or of two bignums.  */
2614 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2615   for (unsigned i = 0; i < parts; i++)
2616     dst[i] |= rhs[i];
2617 }
2618 
2619 /* Bitwise exclusive or of two bignums.  */
2620 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2621   for (unsigned i = 0; i < parts; i++)
2622     dst[i] ^= rhs[i];
2623 }
2624 
2625 /* Complement a bignum in-place.  */
2626 void APInt::tcComplement(WordType *dst, unsigned parts) {
2627   for (unsigned i = 0; i < parts; i++)
2628     dst[i] = ~dst[i];
2629 }
2630 
2631 /* Comparison (unsigned) of two bignums.  */
2632 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2633                      unsigned parts) {
2634   while (parts) {
2635     parts--;
2636     if (lhs[parts] != rhs[parts])
2637       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2638   }
2639 
2640   return 0;
2641 }
2642 
2643 /* Set the least significant BITS bits of a bignum, clear the
2644    rest.  */
2645 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2646                                       unsigned bits) {
2647   unsigned i = 0;
2648   while (bits > APINT_BITS_PER_WORD) {
2649     dst[i++] = ~(WordType) 0;
2650     bits -= APINT_BITS_PER_WORD;
2651   }
2652 
2653   if (bits)
2654     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2655 
2656   while (i < parts)
2657     dst[i++] = 0;
2658 }
2659 
2660 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2661                                    APInt::Rounding RM) {
2662   // Currently udivrem always rounds down.
2663   switch (RM) {
2664   case APInt::Rounding::DOWN:
2665   case APInt::Rounding::TOWARD_ZERO:
2666     return A.udiv(B);
2667   case APInt::Rounding::UP: {
2668     APInt Quo, Rem;
2669     APInt::udivrem(A, B, Quo, Rem);
2670     if (Rem == 0)
2671       return Quo;
2672     return Quo + 1;
2673   }
2674   }
2675   llvm_unreachable("Unknown APInt::Rounding enum");
2676 }
2677 
2678 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2679                                    APInt::Rounding RM) {
2680   switch (RM) {
2681   case APInt::Rounding::DOWN:
2682   case APInt::Rounding::UP: {
2683     APInt Quo, Rem;
2684     APInt::sdivrem(A, B, Quo, Rem);
2685     if (Rem == 0)
2686       return Quo;
2687     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2688     // We want to check whether the non-integer part of the mathematical value
2689     // is negative or not. If the non-integer part is negative, we need to round
2690     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2691     // already rounded down.
2692     if (RM == APInt::Rounding::DOWN) {
2693       if (Rem.isNegative() != B.isNegative())
2694         return Quo - 1;
2695       return Quo;
2696     }
2697     if (Rem.isNegative() != B.isNegative())
2698       return Quo;
2699     return Quo + 1;
2700   }
2701   // Currently sdiv rounds twards zero.
2702   case APInt::Rounding::TOWARD_ZERO:
2703     return A.sdiv(B);
2704   }
2705   llvm_unreachable("Unknown APInt::Rounding enum");
2706 }
2707 
2708 Optional<APInt>
2709 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2710                                            unsigned RangeWidth) {
2711   unsigned CoeffWidth = A.getBitWidth();
2712   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2713   assert(RangeWidth <= CoeffWidth &&
2714          "Value range width should be less than coefficient width");
2715   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2716 
2717   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2718                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2719 
2720   // Identify 0 as a (non)solution immediately.
2721   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2722     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2723     return APInt(CoeffWidth, 0);
2724   }
2725 
2726   // The result of APInt arithmetic has the same bit width as the operands,
2727   // so it can actually lose high bits. A product of two n-bit integers needs
2728   // 2n-1 bits to represent the full value.
2729   // The operation done below (on quadratic coefficients) that can produce
2730   // the largest value is the evaluation of the equation during bisection,
2731   // which needs 3 times the bitwidth of the coefficient, so the total number
2732   // of required bits is 3n.
2733   //
2734   // The purpose of this extension is to simulate the set Z of all integers,
2735   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2736   // and negative numbers (not so much in a modulo arithmetic). The method
2737   // used to solve the equation is based on the standard formula for real
2738   // numbers, and uses the concepts of "positive" and "negative" with their
2739   // usual meanings.
2740   CoeffWidth *= 3;
2741   A = A.sext(CoeffWidth);
2742   B = B.sext(CoeffWidth);
2743   C = C.sext(CoeffWidth);
2744 
2745   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2746   // the bit width has increased.
2747   if (A.isNegative()) {
2748     A.negate();
2749     B.negate();
2750     C.negate();
2751   }
2752 
2753   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2754   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2755   // and R = 2^BitWidth.
2756   // Since we're trying not only to find exact solutions, but also values
2757   // that "wrap around", such a set will always have a solution, i.e. an x
2758   // that satisfies at least one of the equations, or such that |q(x)|
2759   // exceeds kR, while |q(x-1)| for the same k does not.
2760   //
2761   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2762   // positive solution n (in the above sense), and also such that the n
2763   // will be the least among all solutions corresponding to k = 0, 1, ...
2764   // (more precisely, the least element in the set
2765   //   { n(k) | k is such that a solution n(k) exists }).
2766   //
2767   // Consider the parabola (over real numbers) that corresponds to the
2768   // quadratic equation. Since A > 0, the arms of the parabola will point
2769   // up. Picking different values of k will shift it up and down by R.
2770   //
2771   // We want to shift the parabola in such a way as to reduce the problem
2772   // of solving q(x) = kR to solving shifted_q(x) = 0.
2773   // (The interesting solutions are the ceilings of the real number
2774   // solutions.)
2775   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2776   APInt TwoA = 2 * A;
2777   APInt SqrB = B * B;
2778   bool PickLow;
2779 
2780   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2781     assert(A.isStrictlyPositive());
2782     APInt T = V.abs().urem(A);
2783     if (T.isNullValue())
2784       return V;
2785     return V.isNegative() ? V+T : V+(A-T);
2786   };
2787 
2788   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2789   // iff B is positive.
2790   if (B.isNonNegative()) {
2791     // If B >= 0, the vertex it at a negative location (or at 0), so in
2792     // order to have a non-negative solution we need to pick k that makes
2793     // C-kR negative. To satisfy all the requirements for the solution
2794     // that we are looking for, it needs to be closest to 0 of all k.
2795     C = C.srem(R);
2796     if (C.isStrictlyPositive())
2797       C -= R;
2798     // Pick the greater solution.
2799     PickLow = false;
2800   } else {
2801     // If B < 0, the vertex is at a positive location. For any solution
2802     // to exist, the discriminant must be non-negative. This means that
2803     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2804     // lower bound on values of k: kR >= C - B^2/4A.
2805     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2806     // Round LowkR up (towards +inf) to the nearest kR.
2807     LowkR = RoundUp(LowkR, R);
2808 
2809     // If there exists k meeting the condition above, and such that
2810     // C-kR > 0, there will be two positive real number solutions of
2811     // q(x) = kR. Out of all such values of k, pick the one that makes
2812     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2813     // In other words, find maximum k such that LowkR <= kR < C.
2814     if (C.sgt(LowkR)) {
2815       // If LowkR < C, then such a k is guaranteed to exist because
2816       // LowkR itself is a multiple of R.
2817       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2818       // Pick the smaller solution.
2819       PickLow = true;
2820     } else {
2821       // If C-kR < 0 for all potential k's, it means that one solution
2822       // will be negative, while the other will be positive. The positive
2823       // solution will shift towards 0 if the parabola is moved up.
2824       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2825       // to 0, or in other words, out of all parabolas that have solutions,
2826       // pick the one that is the farthest "up").
2827       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2828       C -= LowkR;
2829       // Pick the greater solution.
2830       PickLow = false;
2831     }
2832   }
2833 
2834   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2835                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2836 
2837   APInt D = SqrB - 4*A*C;
2838   assert(D.isNonNegative() && "Negative discriminant");
2839   APInt SQ = D.sqrt();
2840 
2841   APInt Q = SQ * SQ;
2842   bool InexactSQ = Q != D;
2843   // The calculated SQ may actually be greater than the exact (non-integer)
2844   // value. If that's the case, decremement SQ to get a value that is lower.
2845   if (Q.sgt(D))
2846     SQ -= 1;
2847 
2848   APInt X;
2849   APInt Rem;
2850 
2851   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2852   // When using the quadratic formula directly, the calculated low root
2853   // may be greater than the exact one, since we would be subtracting SQ.
2854   // To make sure that the calculated root is not greater than the exact
2855   // one, subtract SQ+1 when calculating the low root (for inexact value
2856   // of SQ).
2857   if (PickLow)
2858     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2859   else
2860     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2861 
2862   // The updated coefficients should be such that the (exact) solution is
2863   // positive. Since APInt division rounds towards 0, the calculated one
2864   // can be 0, but cannot be negative.
2865   assert(X.isNonNegative() && "Solution should be non-negative");
2866 
2867   if (!InexactSQ && Rem.isNullValue()) {
2868     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2869     return X;
2870   }
2871 
2872   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2873   // The exact value of the square root of D should be between SQ and SQ+1.
2874   // This implies that the solution should be between that corresponding to
2875   // SQ (i.e. X) and that corresponding to SQ+1.
2876   //
2877   // The calculated X cannot be greater than the exact (real) solution.
2878   // Actually it must be strictly less than the exact solution, while
2879   // X+1 will be greater than or equal to it.
2880 
2881   APInt VX = (A*X + B)*X + C;
2882   APInt VY = VX + TwoA*X + A + B;
2883   bool SignChange = VX.isNegative() != VY.isNegative() ||
2884                     VX.isNullValue() != VY.isNullValue();
2885   // If the sign did not change between X and X+1, X is not a valid solution.
2886   // This could happen when the actual (exact) roots don't have an integer
2887   // between them, so they would both be contained between X and X+1.
2888   if (!SignChange) {
2889     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2890     return None;
2891   }
2892 
2893   X += 1;
2894   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2895   return X;
2896 }
2897