1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision integer
11 // constant values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <climits>
26 #include <cmath>
27 #include <cstdlib>
28 #include <cstring>
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "apint"
32 
33 /// A utility function for allocating memory, checking for allocation failures,
34 /// and ensuring the contents are zeroed.
35 inline static uint64_t* getClearedMemory(unsigned numWords) {
36   uint64_t * result = new uint64_t[numWords];
37   assert(result && "APInt memory allocation fails!");
38   memset(result, 0, numWords * sizeof(uint64_t));
39   return result;
40 }
41 
42 /// A utility function for allocating memory and checking for allocation
43 /// failure.  The content is not zeroed.
44 inline static uint64_t* getMemory(unsigned numWords) {
45   uint64_t * result = new uint64_t[numWords];
46   assert(result && "APInt memory allocation fails!");
47   return result;
48 }
49 
50 /// A utility function that converts a character to a digit.
51 inline static unsigned getDigit(char cdigit, uint8_t radix) {
52   unsigned r;
53 
54   if (radix == 16 || radix == 36) {
55     r = cdigit - '0';
56     if (r <= 9)
57       return r;
58 
59     r = cdigit - 'A';
60     if (r <= radix - 11U)
61       return r + 10;
62 
63     r = cdigit - 'a';
64     if (r <= radix - 11U)
65       return r + 10;
66 
67     radix = 10;
68   }
69 
70   r = cdigit - '0';
71   if (r < radix)
72     return r;
73 
74   return -1U;
75 }
76 
77 
78 void APInt::initSlowCase(uint64_t val, bool isSigned) {
79   VAL = 0;
80   pVal = getClearedMemory(getNumWords());
81   pVal[0] = val;
82   if (isSigned && int64_t(val) < 0)
83     for (unsigned i = 1; i < getNumWords(); ++i)
84       pVal[i] = -1ULL;
85   clearUnusedBits();
86 }
87 
88 void APInt::initSlowCase(const APInt& that) {
89   VAL = 0;
90   pVal = getMemory(getNumWords());
91   memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
92 }
93 
94 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
95   assert(BitWidth && "Bitwidth too small");
96   assert(bigVal.data() && "Null pointer detected!");
97   if (isSingleWord())
98     VAL = bigVal[0];
99   else {
100     // Get memory, cleared to 0
101     VAL = 0;
102     pVal = getClearedMemory(getNumWords());
103     // Calculate the number of words to copy
104     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
105     // Copy the words from bigVal to pVal
106     memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE);
107   }
108   // Make sure unused high bits are cleared
109   clearUnusedBits();
110 }
111 
112 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
113   : BitWidth(numBits) {
114   initFromArray(bigVal);
115 }
116 
117 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
118   : BitWidth(numBits) {
119   initFromArray(makeArrayRef(bigVal, numWords));
120 }
121 
122 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
123   : VAL(0), BitWidth(numbits) {
124   assert(BitWidth && "Bitwidth too small");
125   fromString(numbits, Str, radix);
126 }
127 
128 void APInt::AssignSlowCase(const APInt& RHS) {
129   // Don't do anything for X = X
130   if (this == &RHS)
131     return;
132 
133   if (BitWidth == RHS.getBitWidth()) {
134     // assume same bit-width single-word case is already handled
135     assert(!isSingleWord());
136     memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
137     return;
138   }
139 
140   if (isSingleWord()) {
141     // assume case where both are single words is already handled
142     assert(!RHS.isSingleWord());
143     VAL = 0;
144     pVal = getMemory(RHS.getNumWords());
145     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
146   } else if (getNumWords() == RHS.getNumWords())
147     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
148   else if (RHS.isSingleWord()) {
149     delete [] pVal;
150     VAL = RHS.VAL;
151   } else {
152     delete [] pVal;
153     pVal = getMemory(RHS.getNumWords());
154     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
155   }
156   BitWidth = RHS.BitWidth;
157   clearUnusedBits();
158 }
159 
160 /// This method 'profiles' an APInt for use with FoldingSet.
161 void APInt::Profile(FoldingSetNodeID& ID) const {
162   ID.AddInteger(BitWidth);
163 
164   if (isSingleWord()) {
165     ID.AddInteger(VAL);
166     return;
167   }
168 
169   unsigned NumWords = getNumWords();
170   for (unsigned i = 0; i < NumWords; ++i)
171     ID.AddInteger(pVal[i]);
172 }
173 
174 /// @brief Prefix increment operator. Increments the APInt by one.
175 APInt& APInt::operator++() {
176   if (isSingleWord())
177     ++VAL;
178   else
179     tcIncrement(pVal, getNumWords());
180   return clearUnusedBits();
181 }
182 
183 /// @brief Prefix decrement operator. Decrements the APInt by one.
184 APInt& APInt::operator--() {
185   if (isSingleWord())
186     --VAL;
187   else
188     tcDecrement(pVal, getNumWords());
189   return clearUnusedBits();
190 }
191 
192 /// Adds the RHS APint to this APInt.
193 /// @returns this, after addition of RHS.
194 /// @brief Addition assignment operator.
195 APInt& APInt::operator+=(const APInt& RHS) {
196   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
197   if (isSingleWord())
198     VAL += RHS.VAL;
199   else
200     tcAdd(pVal, RHS.pVal, 0, getNumWords());
201   return clearUnusedBits();
202 }
203 
204 APInt& APInt::operator+=(uint64_t RHS) {
205   if (isSingleWord())
206     VAL += RHS;
207   else
208     tcAddPart(pVal, RHS, getNumWords());
209   return clearUnusedBits();
210 }
211 
212 /// Subtracts the RHS APInt from this APInt
213 /// @returns this, after subtraction
214 /// @brief Subtraction assignment operator.
215 APInt& APInt::operator-=(const APInt& RHS) {
216   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
217   if (isSingleWord())
218     VAL -= RHS.VAL;
219   else
220     tcSubtract(pVal, RHS.pVal, 0, getNumWords());
221   return clearUnusedBits();
222 }
223 
224 APInt& APInt::operator-=(uint64_t RHS) {
225   if (isSingleWord())
226     VAL -= RHS;
227   else
228     tcSubtractPart(pVal, RHS, getNumWords());
229   return clearUnusedBits();
230 }
231 
232 /// Multiplies an integer array, x, by a uint64_t integer and places the result
233 /// into dest.
234 /// @returns the carry out of the multiplication.
235 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
236 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
237   // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
238   uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
239   uint64_t carry = 0;
240 
241   // For each digit of x.
242   for (unsigned i = 0; i < len; ++i) {
243     // Split x into high and low words
244     uint64_t lx = x[i] & 0xffffffffULL;
245     uint64_t hx = x[i] >> 32;
246     // hasCarry - A flag to indicate if there is a carry to the next digit.
247     // hasCarry == 0, no carry
248     // hasCarry == 1, has carry
249     // hasCarry == 2, no carry and the calculation result == 0.
250     uint8_t hasCarry = 0;
251     dest[i] = carry + lx * ly;
252     // Determine if the add above introduces carry.
253     hasCarry = (dest[i] < carry) ? 1 : 0;
254     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
255     // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
256     // (2^32 - 1) + 2^32 = 2^64.
257     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
258 
259     carry += (lx * hy) & 0xffffffffULL;
260     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
261     carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
262             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
263   }
264   return carry;
265 }
266 
267 /// Multiplies integer array x by integer array y and stores the result into
268 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
269 /// @brief Generalized multiplication of integer arrays.
270 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
271                 unsigned ylen) {
272   dest[xlen] = mul_1(dest, x, xlen, y[0]);
273   for (unsigned i = 1; i < ylen; ++i) {
274     uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
275     uint64_t carry = 0, lx = 0, hx = 0;
276     for (unsigned j = 0; j < xlen; ++j) {
277       lx = x[j] & 0xffffffffULL;
278       hx = x[j] >> 32;
279       // hasCarry - A flag to indicate if has carry.
280       // hasCarry == 0, no carry
281       // hasCarry == 1, has carry
282       // hasCarry == 2, no carry and the calculation result == 0.
283       uint8_t hasCarry = 0;
284       uint64_t resul = carry + lx * ly;
285       hasCarry = (resul < carry) ? 1 : 0;
286       carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
287       hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
288 
289       carry += (lx * hy) & 0xffffffffULL;
290       resul = (carry << 32) | (resul & 0xffffffffULL);
291       dest[i+j] += resul;
292       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
293               (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
294               ((lx * hy) >> 32) + hx * hy;
295     }
296     dest[i+xlen] = carry;
297   }
298 }
299 
300 APInt& APInt::operator*=(const APInt& RHS) {
301   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
302   if (isSingleWord()) {
303     VAL *= RHS.VAL;
304     clearUnusedBits();
305     return *this;
306   }
307 
308   // Get some bit facts about LHS and check for zero
309   unsigned lhsBits = getActiveBits();
310   unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
311   if (!lhsWords)
312     // 0 * X ===> 0
313     return *this;
314 
315   // Get some bit facts about RHS and check for zero
316   unsigned rhsBits = RHS.getActiveBits();
317   unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
318   if (!rhsWords) {
319     // X * 0 ===> 0
320     clearAllBits();
321     return *this;
322   }
323 
324   // Allocate space for the result
325   unsigned destWords = rhsWords + lhsWords;
326   uint64_t *dest = getMemory(destWords);
327 
328   // Perform the long multiply
329   mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
330 
331   // Copy result back into *this
332   clearAllBits();
333   unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
334   memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
335   clearUnusedBits();
336 
337   // delete dest array and return
338   delete[] dest;
339   return *this;
340 }
341 
342 void APInt::AndAssignSlowCase(const APInt& RHS) {
343   tcAnd(pVal, RHS.pVal, getNumWords());
344 }
345 
346 void APInt::OrAssignSlowCase(const APInt& RHS) {
347   tcOr(pVal, RHS.pVal, getNumWords());
348 }
349 
350 void APInt::XorAssignSlowCase(const APInt& RHS) {
351   tcXor(pVal, RHS.pVal, getNumWords());
352 }
353 
354 APInt APInt::operator*(const APInt& RHS) const {
355   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
356   if (isSingleWord())
357     return APInt(BitWidth, VAL * RHS.VAL);
358   APInt Result(*this);
359   Result *= RHS;
360   return Result;
361 }
362 
363 bool APInt::EqualSlowCase(const APInt& RHS) const {
364   return std::equal(pVal, pVal + getNumWords(), RHS.pVal);
365 }
366 
367 int APInt::compare(const APInt& RHS) const {
368   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
369   if (isSingleWord())
370     return VAL < RHS.VAL ? -1 : VAL > RHS.VAL;
371 
372   return tcCompare(pVal, RHS.pVal, getNumWords());
373 }
374 
375 int APInt::compareSigned(const APInt& RHS) const {
376   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
377   if (isSingleWord()) {
378     int64_t lhsSext = SignExtend64(VAL, BitWidth);
379     int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth);
380     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
381   }
382 
383   bool lhsNeg = isNegative();
384   bool rhsNeg = RHS.isNegative();
385 
386   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
387   if (lhsNeg != rhsNeg)
388     return lhsNeg ? -1 : 1;
389 
390   // Otherwise we can just use an unsigned comparison, because even negative
391   // numbers compare correctly this way if both have the same signed-ness.
392   return tcCompare(pVal, RHS.pVal, getNumWords());
393 }
394 
395 void APInt::setBit(unsigned bitPosition) {
396   if (isSingleWord())
397     VAL |= maskBit(bitPosition);
398   else
399     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
400 }
401 
402 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
403   unsigned loWord = whichWord(loBit);
404   unsigned hiWord = whichWord(hiBit);
405 
406   // Create an initial mask for the low word with zeros below loBit.
407   uint64_t loMask = UINT64_MAX << whichBit(loBit);
408 
409   // If hiBit is not aligned, we need a high mask.
410   unsigned hiShiftAmt = whichBit(hiBit);
411   if (hiShiftAmt != 0) {
412     // Create a high mask with zeros above hiBit.
413     uint64_t hiMask = UINT64_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
414     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
415     // set the bits in hiWord.
416     if (hiWord == loWord)
417       loMask &= hiMask;
418     else
419       pVal[hiWord] |= hiMask;
420   }
421   // Apply the mask to the low word.
422   pVal[loWord] |= loMask;
423 
424   // Fill any words between loWord and hiWord with all ones.
425   for (unsigned word = loWord + 1; word < hiWord; ++word)
426     pVal[word] = UINT64_MAX;
427 }
428 
429 /// Set the given bit to 0 whose position is given as "bitPosition".
430 /// @brief Set a given bit to 0.
431 void APInt::clearBit(unsigned bitPosition) {
432   if (isSingleWord())
433     VAL &= ~maskBit(bitPosition);
434   else
435     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
436 }
437 
438 /// @brief Toggle every bit to its opposite value.
439 void APInt::flipAllBitsSlowCase() {
440   tcComplement(pVal, getNumWords());
441   clearUnusedBits();
442 }
443 
444 /// Toggle a given bit to its opposite value whose position is given
445 /// as "bitPosition".
446 /// @brief Toggles a given bit to its opposite value.
447 void APInt::flipBit(unsigned bitPosition) {
448   assert(bitPosition < BitWidth && "Out of the bit-width range!");
449   if ((*this)[bitPosition]) clearBit(bitPosition);
450   else setBit(bitPosition);
451 }
452 
453 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
454   unsigned subBitWidth = subBits.getBitWidth();
455   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
456          "Illegal bit insertion");
457 
458   // Insertion is a direct copy.
459   if (subBitWidth == BitWidth) {
460     *this = subBits;
461     return;
462   }
463 
464   // Single word result can be done as a direct bitmask.
465   if (isSingleWord()) {
466     uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
467     VAL &= ~(mask << bitPosition);
468     VAL |= (subBits.VAL << bitPosition);
469     return;
470   }
471 
472   unsigned loBit = whichBit(bitPosition);
473   unsigned loWord = whichWord(bitPosition);
474   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
475 
476   // Insertion within a single word can be done as a direct bitmask.
477   if (loWord == hi1Word) {
478     uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
479     pVal[loWord] &= ~(mask << loBit);
480     pVal[loWord] |= (subBits.VAL << loBit);
481     return;
482   }
483 
484   // Insert on word boundaries.
485   if (loBit == 0) {
486     // Direct copy whole words.
487     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
488     memcpy(pVal + loWord, subBits.getRawData(),
489            numWholeSubWords * APINT_WORD_SIZE);
490 
491     // Mask+insert remaining bits.
492     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
493     if (remainingBits != 0) {
494       uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits);
495       pVal[hi1Word] &= ~mask;
496       pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
497     }
498     return;
499   }
500 
501   // General case - set/clear individual bits in dst based on src.
502   // TODO - there is scope for optimization here, but at the moment this code
503   // path is barely used so prefer readability over performance.
504   for (unsigned i = 0; i != subBitWidth; ++i) {
505     if (subBits[i])
506       setBit(bitPosition + i);
507     else
508       clearBit(bitPosition + i);
509   }
510 }
511 
512 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
513   assert(numBits > 0 && "Can't extract zero bits");
514   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
515          "Illegal bit extraction");
516 
517   if (isSingleWord())
518     return APInt(numBits, VAL >> bitPosition);
519 
520   unsigned loBit = whichBit(bitPosition);
521   unsigned loWord = whichWord(bitPosition);
522   unsigned hiWord = whichWord(bitPosition + numBits - 1);
523 
524   // Single word result extracting bits from a single word source.
525   if (loWord == hiWord)
526     return APInt(numBits, pVal[loWord] >> loBit);
527 
528   // Extracting bits that start on a source word boundary can be done
529   // as a fast memory copy.
530   if (loBit == 0)
531     return APInt(numBits, makeArrayRef(pVal + loWord, 1 + hiWord - loWord));
532 
533   // General case - shift + copy source words directly into place.
534   APInt Result(numBits, 0);
535   unsigned NumSrcWords = getNumWords();
536   unsigned NumDstWords = Result.getNumWords();
537 
538   for (unsigned word = 0; word < NumDstWords; ++word) {
539     uint64_t w0 = pVal[loWord + word];
540     uint64_t w1 =
541         (loWord + word + 1) < NumSrcWords ? pVal[loWord + word + 1] : 0;
542     Result.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
543   }
544 
545   return Result.clearUnusedBits();
546 }
547 
548 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
549   assert(!str.empty() && "Invalid string length");
550   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
551           radix == 36) &&
552          "Radix should be 2, 8, 10, 16, or 36!");
553 
554   size_t slen = str.size();
555 
556   // Each computation below needs to know if it's negative.
557   StringRef::iterator p = str.begin();
558   unsigned isNegative = *p == '-';
559   if (*p == '-' || *p == '+') {
560     p++;
561     slen--;
562     assert(slen && "String is only a sign, needs a value.");
563   }
564 
565   // For radixes of power-of-two values, the bits required is accurately and
566   // easily computed
567   if (radix == 2)
568     return slen + isNegative;
569   if (radix == 8)
570     return slen * 3 + isNegative;
571   if (radix == 16)
572     return slen * 4 + isNegative;
573 
574   // FIXME: base 36
575 
576   // This is grossly inefficient but accurate. We could probably do something
577   // with a computation of roughly slen*64/20 and then adjust by the value of
578   // the first few digits. But, I'm not sure how accurate that could be.
579 
580   // Compute a sufficient number of bits that is always large enough but might
581   // be too large. This avoids the assertion in the constructor. This
582   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
583   // bits in that case.
584   unsigned sufficient
585     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
586                  : (slen == 1 ? 7 : slen * 16/3);
587 
588   // Convert to the actual binary value.
589   APInt tmp(sufficient, StringRef(p, slen), radix);
590 
591   // Compute how many bits are required. If the log is infinite, assume we need
592   // just bit.
593   unsigned log = tmp.logBase2();
594   if (log == (unsigned)-1) {
595     return isNegative + 1;
596   } else {
597     return isNegative + log + 1;
598   }
599 }
600 
601 hash_code llvm::hash_value(const APInt &Arg) {
602   if (Arg.isSingleWord())
603     return hash_combine(Arg.VAL);
604 
605   return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
606 }
607 
608 bool APInt::isSplat(unsigned SplatSizeInBits) const {
609   assert(getBitWidth() % SplatSizeInBits == 0 &&
610          "SplatSizeInBits must divide width!");
611   // We can check that all parts of an integer are equal by making use of a
612   // little trick: rotate and check if it's still the same value.
613   return *this == rotl(SplatSizeInBits);
614 }
615 
616 /// This function returns the high "numBits" bits of this APInt.
617 APInt APInt::getHiBits(unsigned numBits) const {
618   return this->lshr(BitWidth - numBits);
619 }
620 
621 /// This function returns the low "numBits" bits of this APInt.
622 APInt APInt::getLoBits(unsigned numBits) const {
623   APInt Result(getLowBitsSet(BitWidth, numBits));
624   Result &= *this;
625   return Result;
626 }
627 
628 unsigned APInt::countLeadingZerosSlowCase() const {
629   unsigned Count = 0;
630   for (int i = getNumWords()-1; i >= 0; --i) {
631     uint64_t V = pVal[i];
632     if (V == 0)
633       Count += APINT_BITS_PER_WORD;
634     else {
635       Count += llvm::countLeadingZeros(V);
636       break;
637     }
638   }
639   // Adjust for unused bits in the most significant word (they are zero).
640   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
641   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
642   return Count;
643 }
644 
645 unsigned APInt::countLeadingOnes() const {
646   if (isSingleWord())
647     return llvm::countLeadingOnes(VAL << (APINT_BITS_PER_WORD - BitWidth));
648 
649   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
650   unsigned shift;
651   if (!highWordBits) {
652     highWordBits = APINT_BITS_PER_WORD;
653     shift = 0;
654   } else {
655     shift = APINT_BITS_PER_WORD - highWordBits;
656   }
657   int i = getNumWords() - 1;
658   unsigned Count = llvm::countLeadingOnes(pVal[i] << shift);
659   if (Count == highWordBits) {
660     for (i--; i >= 0; --i) {
661       if (pVal[i] == -1ULL)
662         Count += APINT_BITS_PER_WORD;
663       else {
664         Count += llvm::countLeadingOnes(pVal[i]);
665         break;
666       }
667     }
668   }
669   return Count;
670 }
671 
672 unsigned APInt::countTrailingZeros() const {
673   if (isSingleWord())
674     return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth);
675   unsigned Count = 0;
676   unsigned i = 0;
677   for (; i < getNumWords() && pVal[i] == 0; ++i)
678     Count += APINT_BITS_PER_WORD;
679   if (i < getNumWords())
680     Count += llvm::countTrailingZeros(pVal[i]);
681   return std::min(Count, BitWidth);
682 }
683 
684 unsigned APInt::countTrailingOnesSlowCase() const {
685   unsigned Count = 0;
686   unsigned i = 0;
687   for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
688     Count += APINT_BITS_PER_WORD;
689   if (i < getNumWords())
690     Count += llvm::countTrailingOnes(pVal[i]);
691   return std::min(Count, BitWidth);
692 }
693 
694 unsigned APInt::countPopulationSlowCase() const {
695   unsigned Count = 0;
696   for (unsigned i = 0; i < getNumWords(); ++i)
697     Count += llvm::countPopulation(pVal[i]);
698   return Count;
699 }
700 
701 bool APInt::intersectsSlowCase(const APInt &RHS) const {
702   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
703     if ((pVal[i] & RHS.pVal[i]) != 0)
704       return true;
705 
706   return false;
707 }
708 
709 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
710   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
711     if ((pVal[i] & ~RHS.pVal[i]) != 0)
712       return false;
713 
714   return true;
715 }
716 
717 APInt APInt::byteSwap() const {
718   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
719   if (BitWidth == 16)
720     return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
721   if (BitWidth == 32)
722     return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
723   if (BitWidth == 48) {
724     unsigned Tmp1 = unsigned(VAL >> 16);
725     Tmp1 = ByteSwap_32(Tmp1);
726     uint16_t Tmp2 = uint16_t(VAL);
727     Tmp2 = ByteSwap_16(Tmp2);
728     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
729   }
730   if (BitWidth == 64)
731     return APInt(BitWidth, ByteSwap_64(VAL));
732 
733   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
734   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
735     Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
736   if (Result.BitWidth != BitWidth) {
737     Result.lshrInPlace(Result.BitWidth - BitWidth);
738     Result.BitWidth = BitWidth;
739   }
740   return Result;
741 }
742 
743 APInt APInt::reverseBits() const {
744   switch (BitWidth) {
745   case 64:
746     return APInt(BitWidth, llvm::reverseBits<uint64_t>(VAL));
747   case 32:
748     return APInt(BitWidth, llvm::reverseBits<uint32_t>(VAL));
749   case 16:
750     return APInt(BitWidth, llvm::reverseBits<uint16_t>(VAL));
751   case 8:
752     return APInt(BitWidth, llvm::reverseBits<uint8_t>(VAL));
753   default:
754     break;
755   }
756 
757   APInt Val(*this);
758   APInt Reversed(BitWidth, 0);
759   unsigned S = BitWidth;
760 
761   for (; Val != 0; Val.lshrInPlace(1)) {
762     Reversed <<= 1;
763     Reversed |= Val[0];
764     --S;
765   }
766 
767   Reversed <<= S;
768   return Reversed;
769 }
770 
771 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
772   // Fast-path a common case.
773   if (A == B) return A;
774 
775   // Corner cases: if either operand is zero, the other is the gcd.
776   if (!A) return B;
777   if (!B) return A;
778 
779   // Count common powers of 2 and remove all other powers of 2.
780   unsigned Pow2;
781   {
782     unsigned Pow2_A = A.countTrailingZeros();
783     unsigned Pow2_B = B.countTrailingZeros();
784     if (Pow2_A > Pow2_B) {
785       A.lshrInPlace(Pow2_A - Pow2_B);
786       Pow2 = Pow2_B;
787     } else if (Pow2_B > Pow2_A) {
788       B.lshrInPlace(Pow2_B - Pow2_A);
789       Pow2 = Pow2_A;
790     } else {
791       Pow2 = Pow2_A;
792     }
793   }
794 
795   // Both operands are odd multiples of 2^Pow_2:
796   //
797   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
798   //
799   // This is a modified version of Stein's algorithm, taking advantage of
800   // efficient countTrailingZeros().
801   while (A != B) {
802     if (A.ugt(B)) {
803       A -= B;
804       A.lshrInPlace(A.countTrailingZeros() - Pow2);
805     } else {
806       B -= A;
807       B.lshrInPlace(B.countTrailingZeros() - Pow2);
808     }
809   }
810 
811   return A;
812 }
813 
814 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
815   union {
816     double D;
817     uint64_t I;
818   } T;
819   T.D = Double;
820 
821   // Get the sign bit from the highest order bit
822   bool isNeg = T.I >> 63;
823 
824   // Get the 11-bit exponent and adjust for the 1023 bit bias
825   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
826 
827   // If the exponent is negative, the value is < 0 so just return 0.
828   if (exp < 0)
829     return APInt(width, 0u);
830 
831   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
832   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
833 
834   // If the exponent doesn't shift all bits out of the mantissa
835   if (exp < 52)
836     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
837                     APInt(width, mantissa >> (52 - exp));
838 
839   // If the client didn't provide enough bits for us to shift the mantissa into
840   // then the result is undefined, just return 0
841   if (width <= exp - 52)
842     return APInt(width, 0);
843 
844   // Otherwise, we have to shift the mantissa bits up to the right location
845   APInt Tmp(width, mantissa);
846   Tmp = Tmp.shl((unsigned)exp - 52);
847   return isNeg ? -Tmp : Tmp;
848 }
849 
850 /// This function converts this APInt to a double.
851 /// The layout for double is as following (IEEE Standard 754):
852 ///  --------------------------------------
853 /// |  Sign    Exponent    Fraction    Bias |
854 /// |-------------------------------------- |
855 /// |  1[63]   11[62-52]   52[51-00]   1023 |
856 ///  --------------------------------------
857 double APInt::roundToDouble(bool isSigned) const {
858 
859   // Handle the simple case where the value is contained in one uint64_t.
860   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
861   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
862     if (isSigned) {
863       int64_t sext = SignExtend64(getWord(0), BitWidth);
864       return double(sext);
865     } else
866       return double(getWord(0));
867   }
868 
869   // Determine if the value is negative.
870   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
871 
872   // Construct the absolute value if we're negative.
873   APInt Tmp(isNeg ? -(*this) : (*this));
874 
875   // Figure out how many bits we're using.
876   unsigned n = Tmp.getActiveBits();
877 
878   // The exponent (without bias normalization) is just the number of bits
879   // we are using. Note that the sign bit is gone since we constructed the
880   // absolute value.
881   uint64_t exp = n;
882 
883   // Return infinity for exponent overflow
884   if (exp > 1023) {
885     if (!isSigned || !isNeg)
886       return std::numeric_limits<double>::infinity();
887     else
888       return -std::numeric_limits<double>::infinity();
889   }
890   exp += 1023; // Increment for 1023 bias
891 
892   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
893   // extract the high 52 bits from the correct words in pVal.
894   uint64_t mantissa;
895   unsigned hiWord = whichWord(n-1);
896   if (hiWord == 0) {
897     mantissa = Tmp.pVal[0];
898     if (n > 52)
899       mantissa >>= n - 52; // shift down, we want the top 52 bits.
900   } else {
901     assert(hiWord > 0 && "huh?");
902     uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
903     uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
904     mantissa = hibits | lobits;
905   }
906 
907   // The leading bit of mantissa is implicit, so get rid of it.
908   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
909   union {
910     double D;
911     uint64_t I;
912   } T;
913   T.I = sign | (exp << 52) | mantissa;
914   return T.D;
915 }
916 
917 // Truncate to new width.
918 APInt APInt::trunc(unsigned width) const {
919   assert(width < BitWidth && "Invalid APInt Truncate request");
920   assert(width && "Can't truncate to 0 bits");
921 
922   if (width <= APINT_BITS_PER_WORD)
923     return APInt(width, getRawData()[0]);
924 
925   APInt Result(getMemory(getNumWords(width)), width);
926 
927   // Copy full words.
928   unsigned i;
929   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
930     Result.pVal[i] = pVal[i];
931 
932   // Truncate and copy any partial word.
933   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
934   if (bits != 0)
935     Result.pVal[i] = pVal[i] << bits >> bits;
936 
937   return Result;
938 }
939 
940 // Sign extend to a new width.
941 APInt APInt::sext(unsigned width) const {
942   assert(width > BitWidth && "Invalid APInt SignExtend request");
943 
944   if (width <= APINT_BITS_PER_WORD) {
945     uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
946     val = (int64_t)val >> (width - BitWidth);
947     return APInt(width, val >> (APINT_BITS_PER_WORD - width));
948   }
949 
950   APInt Result(getMemory(getNumWords(width)), width);
951 
952   // Copy full words.
953   unsigned i;
954   uint64_t word = 0;
955   for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
956     word = getRawData()[i];
957     Result.pVal[i] = word;
958   }
959 
960   // Read and sign-extend any partial word.
961   unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
962   if (bits != 0)
963     word = (int64_t)getRawData()[i] << bits >> bits;
964   else
965     word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
966 
967   // Write remaining full words.
968   for (; i != width / APINT_BITS_PER_WORD; i++) {
969     Result.pVal[i] = word;
970     word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
971   }
972 
973   // Write any partial word.
974   bits = (0 - width) % APINT_BITS_PER_WORD;
975   if (bits != 0)
976     Result.pVal[i] = word << bits >> bits;
977 
978   return Result;
979 }
980 
981 //  Zero extend to a new width.
982 APInt APInt::zext(unsigned width) const {
983   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
984 
985   if (width <= APINT_BITS_PER_WORD)
986     return APInt(width, VAL);
987 
988   APInt Result(getMemory(getNumWords(width)), width);
989 
990   // Copy words.
991   unsigned i;
992   for (i = 0; i != getNumWords(); i++)
993     Result.pVal[i] = getRawData()[i];
994 
995   // Zero remaining words.
996   memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
997 
998   return Result;
999 }
1000 
1001 APInt APInt::zextOrTrunc(unsigned width) const {
1002   if (BitWidth < width)
1003     return zext(width);
1004   if (BitWidth > width)
1005     return trunc(width);
1006   return *this;
1007 }
1008 
1009 APInt APInt::sextOrTrunc(unsigned width) const {
1010   if (BitWidth < width)
1011     return sext(width);
1012   if (BitWidth > width)
1013     return trunc(width);
1014   return *this;
1015 }
1016 
1017 APInt APInt::zextOrSelf(unsigned width) const {
1018   if (BitWidth < width)
1019     return zext(width);
1020   return *this;
1021 }
1022 
1023 APInt APInt::sextOrSelf(unsigned width) const {
1024   if (BitWidth < width)
1025     return sext(width);
1026   return *this;
1027 }
1028 
1029 /// Arithmetic right-shift this APInt by shiftAmt.
1030 /// @brief Arithmetic right-shift function.
1031 APInt APInt::ashr(const APInt &shiftAmt) const {
1032   return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1033 }
1034 
1035 /// Arithmetic right-shift this APInt by shiftAmt.
1036 /// @brief Arithmetic right-shift function.
1037 APInt APInt::ashr(unsigned shiftAmt) const {
1038   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1039   // Handle a degenerate case
1040   if (shiftAmt == 0)
1041     return *this;
1042 
1043   // Handle single word shifts with built-in ashr
1044   if (isSingleWord()) {
1045     if (shiftAmt == BitWidth)
1046       return APInt(BitWidth, 0); // undefined
1047     return APInt(BitWidth, SignExtend64(VAL, BitWidth) >> shiftAmt);
1048   }
1049 
1050   // If all the bits were shifted out, the result is, technically, undefined.
1051   // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1052   // issues in the algorithm below.
1053   if (shiftAmt == BitWidth) {
1054     if (isNegative())
1055       return APInt(BitWidth, -1ULL, true);
1056     else
1057       return APInt(BitWidth, 0);
1058   }
1059 
1060   // Create some space for the result.
1061   uint64_t * val = new uint64_t[getNumWords()];
1062 
1063   // Compute some values needed by the following shift algorithms
1064   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1065   unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1066   unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1067   unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1068   if (bitsInWord == 0)
1069     bitsInWord = APINT_BITS_PER_WORD;
1070 
1071   // If we are shifting whole words, just move whole words
1072   if (wordShift == 0) {
1073     // Move the words containing significant bits
1074     for (unsigned i = 0; i <= breakWord; ++i)
1075       val[i] = pVal[i+offset]; // move whole word
1076 
1077     // Adjust the top significant word for sign bit fill, if negative
1078     if (isNegative())
1079       if (bitsInWord < APINT_BITS_PER_WORD)
1080         val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1081   } else {
1082     // Shift the low order words
1083     for (unsigned i = 0; i < breakWord; ++i) {
1084       // This combines the shifted corresponding word with the low bits from
1085       // the next word (shifted into this word's high bits).
1086       val[i] = (pVal[i+offset] >> wordShift) |
1087                (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1088     }
1089 
1090     // Shift the break word. In this case there are no bits from the next word
1091     // to include in this word.
1092     val[breakWord] = pVal[breakWord+offset] >> wordShift;
1093 
1094     // Deal with sign extension in the break word, and possibly the word before
1095     // it.
1096     if (isNegative()) {
1097       if (wordShift > bitsInWord) {
1098         if (breakWord > 0)
1099           val[breakWord-1] |=
1100             ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1101         val[breakWord] |= ~0ULL;
1102       } else
1103         val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1104     }
1105   }
1106 
1107   // Remaining words are 0 or -1, just assign them.
1108   uint64_t fillValue = (isNegative() ? -1ULL : 0);
1109   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1110     val[i] = fillValue;
1111   APInt Result(val, BitWidth);
1112   Result.clearUnusedBits();
1113   return Result;
1114 }
1115 
1116 /// Logical right-shift this APInt by shiftAmt.
1117 /// @brief Logical right-shift function.
1118 void APInt::lshrInPlace(const APInt &shiftAmt) {
1119   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1120 }
1121 
1122 /// Logical right-shift this APInt by shiftAmt.
1123 /// @brief Logical right-shift function.
1124 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1125   tcShiftRight(pVal, getNumWords(), ShiftAmt);
1126 }
1127 
1128 /// Left-shift this APInt by shiftAmt.
1129 /// @brief Left-shift function.
1130 APInt APInt::shl(const APInt &shiftAmt) const {
1131   // It's undefined behavior in C to shift by BitWidth or greater.
1132   return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1133 }
1134 
1135 void APInt::shlSlowCase(unsigned ShiftAmt) {
1136   tcShiftLeft(pVal, getNumWords(), ShiftAmt);
1137   clearUnusedBits();
1138 }
1139 
1140 // Calculate the rotate amount modulo the bit width.
1141 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1142   unsigned rotBitWidth = rotateAmt.getBitWidth();
1143   APInt rot = rotateAmt;
1144   if (rotBitWidth < BitWidth) {
1145     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1146     // e.g. APInt(1, 32) would give APInt(1, 0).
1147     rot = rotateAmt.zext(BitWidth);
1148   }
1149   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1150   return rot.getLimitedValue(BitWidth);
1151 }
1152 
1153 APInt APInt::rotl(const APInt &rotateAmt) const {
1154   return rotl(rotateModulo(BitWidth, rotateAmt));
1155 }
1156 
1157 APInt APInt::rotl(unsigned rotateAmt) const {
1158   rotateAmt %= BitWidth;
1159   if (rotateAmt == 0)
1160     return *this;
1161   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1162 }
1163 
1164 APInt APInt::rotr(const APInt &rotateAmt) const {
1165   return rotr(rotateModulo(BitWidth, rotateAmt));
1166 }
1167 
1168 APInt APInt::rotr(unsigned rotateAmt) const {
1169   rotateAmt %= BitWidth;
1170   if (rotateAmt == 0)
1171     return *this;
1172   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1173 }
1174 
1175 // Square Root - this method computes and returns the square root of "this".
1176 // Three mechanisms are used for computation. For small values (<= 5 bits),
1177 // a table lookup is done. This gets some performance for common cases. For
1178 // values using less than 52 bits, the value is converted to double and then
1179 // the libc sqrt function is called. The result is rounded and then converted
1180 // back to a uint64_t which is then used to construct the result. Finally,
1181 // the Babylonian method for computing square roots is used.
1182 APInt APInt::sqrt() const {
1183 
1184   // Determine the magnitude of the value.
1185   unsigned magnitude = getActiveBits();
1186 
1187   // Use a fast table for some small values. This also gets rid of some
1188   // rounding errors in libc sqrt for small values.
1189   if (magnitude <= 5) {
1190     static const uint8_t results[32] = {
1191       /*     0 */ 0,
1192       /*  1- 2 */ 1, 1,
1193       /*  3- 6 */ 2, 2, 2, 2,
1194       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1195       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1196       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1197       /*    31 */ 6
1198     };
1199     return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1200   }
1201 
1202   // If the magnitude of the value fits in less than 52 bits (the precision of
1203   // an IEEE double precision floating point value), then we can use the
1204   // libc sqrt function which will probably use a hardware sqrt computation.
1205   // This should be faster than the algorithm below.
1206   if (magnitude < 52) {
1207     return APInt(BitWidth,
1208                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1209   }
1210 
1211   // Okay, all the short cuts are exhausted. We must compute it. The following
1212   // is a classical Babylonian method for computing the square root. This code
1213   // was adapted to APInt from a wikipedia article on such computations.
1214   // See http://www.wikipedia.org/ and go to the page named
1215   // Calculate_an_integer_square_root.
1216   unsigned nbits = BitWidth, i = 4;
1217   APInt testy(BitWidth, 16);
1218   APInt x_old(BitWidth, 1);
1219   APInt x_new(BitWidth, 0);
1220   APInt two(BitWidth, 2);
1221 
1222   // Select a good starting value using binary logarithms.
1223   for (;; i += 2, testy = testy.shl(2))
1224     if (i >= nbits || this->ule(testy)) {
1225       x_old = x_old.shl(i / 2);
1226       break;
1227     }
1228 
1229   // Use the Babylonian method to arrive at the integer square root:
1230   for (;;) {
1231     x_new = (this->udiv(x_old) + x_old).udiv(two);
1232     if (x_old.ule(x_new))
1233       break;
1234     x_old = x_new;
1235   }
1236 
1237   // Make sure we return the closest approximation
1238   // NOTE: The rounding calculation below is correct. It will produce an
1239   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1240   // determined to be a rounding issue with pari/gp as it begins to use a
1241   // floating point representation after 192 bits. There are no discrepancies
1242   // between this algorithm and pari/gp for bit widths < 192 bits.
1243   APInt square(x_old * x_old);
1244   APInt nextSquare((x_old + 1) * (x_old +1));
1245   if (this->ult(square))
1246     return x_old;
1247   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1248   APInt midpoint((nextSquare - square).udiv(two));
1249   APInt offset(*this - square);
1250   if (offset.ult(midpoint))
1251     return x_old;
1252   return x_old + 1;
1253 }
1254 
1255 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1256 /// iterative extended Euclidean algorithm is used to solve for this value,
1257 /// however we simplify it to speed up calculating only the inverse, and take
1258 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1259 /// (potentially large) APInts around.
1260 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1261   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1262 
1263   // Using the properties listed at the following web page (accessed 06/21/08):
1264   //   http://www.numbertheory.org/php/euclid.html
1265   // (especially the properties numbered 3, 4 and 9) it can be proved that
1266   // BitWidth bits suffice for all the computations in the algorithm implemented
1267   // below. More precisely, this number of bits suffice if the multiplicative
1268   // inverse exists, but may not suffice for the general extended Euclidean
1269   // algorithm.
1270 
1271   APInt r[2] = { modulo, *this };
1272   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1273   APInt q(BitWidth, 0);
1274 
1275   unsigned i;
1276   for (i = 0; r[i^1] != 0; i ^= 1) {
1277     // An overview of the math without the confusing bit-flipping:
1278     // q = r[i-2] / r[i-1]
1279     // r[i] = r[i-2] % r[i-1]
1280     // t[i] = t[i-2] - t[i-1] * q
1281     udivrem(r[i], r[i^1], q, r[i]);
1282     t[i] -= t[i^1] * q;
1283   }
1284 
1285   // If this APInt and the modulo are not coprime, there is no multiplicative
1286   // inverse, so return 0. We check this by looking at the next-to-last
1287   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1288   // algorithm.
1289   if (r[i] != 1)
1290     return APInt(BitWidth, 0);
1291 
1292   // The next-to-last t is the multiplicative inverse.  However, we are
1293   // interested in a positive inverse. Calcuate a positive one from a negative
1294   // one if necessary. A simple addition of the modulo suffices because
1295   // abs(t[i]) is known to be less than *this/2 (see the link above).
1296   return t[i].isNegative() ? t[i] + modulo : t[i];
1297 }
1298 
1299 /// Calculate the magic numbers required to implement a signed integer division
1300 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1301 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1302 /// Warren, Jr., chapter 10.
1303 APInt::ms APInt::magic() const {
1304   const APInt& d = *this;
1305   unsigned p;
1306   APInt ad, anc, delta, q1, r1, q2, r2, t;
1307   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1308   struct ms mag;
1309 
1310   ad = d.abs();
1311   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1312   anc = t - 1 - t.urem(ad);   // absolute value of nc
1313   p = d.getBitWidth() - 1;    // initialize p
1314   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1315   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1316   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1317   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1318   do {
1319     p = p + 1;
1320     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1321     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1322     if (r1.uge(anc)) {  // must be unsigned comparison
1323       q1 = q1 + 1;
1324       r1 = r1 - anc;
1325     }
1326     q2 = q2<<1;          // update q2 = 2p/abs(d)
1327     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1328     if (r2.uge(ad)) {   // must be unsigned comparison
1329       q2 = q2 + 1;
1330       r2 = r2 - ad;
1331     }
1332     delta = ad - r2;
1333   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1334 
1335   mag.m = q2 + 1;
1336   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1337   mag.s = p - d.getBitWidth();          // resulting shift
1338   return mag;
1339 }
1340 
1341 /// Calculate the magic numbers required to implement an unsigned integer
1342 /// division by a constant as a sequence of multiplies, adds and shifts.
1343 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1344 /// S. Warren, Jr., chapter 10.
1345 /// LeadingZeros can be used to simplify the calculation if the upper bits
1346 /// of the divided value are known zero.
1347 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1348   const APInt& d = *this;
1349   unsigned p;
1350   APInt nc, delta, q1, r1, q2, r2;
1351   struct mu magu;
1352   magu.a = 0;               // initialize "add" indicator
1353   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1354   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1355   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1356 
1357   nc = allOnes - (allOnes - d).urem(d);
1358   p = d.getBitWidth() - 1;  // initialize p
1359   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1360   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1361   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1362   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1363   do {
1364     p = p + 1;
1365     if (r1.uge(nc - r1)) {
1366       q1 = q1 + q1 + 1;  // update q1
1367       r1 = r1 + r1 - nc; // update r1
1368     }
1369     else {
1370       q1 = q1+q1; // update q1
1371       r1 = r1+r1; // update r1
1372     }
1373     if ((r2 + 1).uge(d - r2)) {
1374       if (q2.uge(signedMax)) magu.a = 1;
1375       q2 = q2+q2 + 1;     // update q2
1376       r2 = r2+r2 + 1 - d; // update r2
1377     }
1378     else {
1379       if (q2.uge(signedMin)) magu.a = 1;
1380       q2 = q2+q2;     // update q2
1381       r2 = r2+r2 + 1; // update r2
1382     }
1383     delta = d - 1 - r2;
1384   } while (p < d.getBitWidth()*2 &&
1385            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1386   magu.m = q2 + 1; // resulting magic number
1387   magu.s = p - d.getBitWidth();  // resulting shift
1388   return magu;
1389 }
1390 
1391 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1392 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1393 /// variables here have the same names as in the algorithm. Comments explain
1394 /// the algorithm and any deviation from it.
1395 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1396                      unsigned m, unsigned n) {
1397   assert(u && "Must provide dividend");
1398   assert(v && "Must provide divisor");
1399   assert(q && "Must provide quotient");
1400   assert(u != v && u != q && v != q && "Must use different memory");
1401   assert(n>1 && "n must be > 1");
1402 
1403   // b denotes the base of the number system. In our case b is 2^32.
1404   const uint64_t b = uint64_t(1) << 32;
1405 
1406   DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1407   DEBUG(dbgs() << "KnuthDiv: original:");
1408   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1409   DEBUG(dbgs() << " by");
1410   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1411   DEBUG(dbgs() << '\n');
1412   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1413   // u and v by d. Note that we have taken Knuth's advice here to use a power
1414   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1415   // 2 allows us to shift instead of multiply and it is easy to determine the
1416   // shift amount from the leading zeros.  We are basically normalizing the u
1417   // and v so that its high bits are shifted to the top of v's range without
1418   // overflow. Note that this can require an extra word in u so that u must
1419   // be of length m+n+1.
1420   unsigned shift = countLeadingZeros(v[n-1]);
1421   unsigned v_carry = 0;
1422   unsigned u_carry = 0;
1423   if (shift) {
1424     for (unsigned i = 0; i < m+n; ++i) {
1425       unsigned u_tmp = u[i] >> (32 - shift);
1426       u[i] = (u[i] << shift) | u_carry;
1427       u_carry = u_tmp;
1428     }
1429     for (unsigned i = 0; i < n; ++i) {
1430       unsigned v_tmp = v[i] >> (32 - shift);
1431       v[i] = (v[i] << shift) | v_carry;
1432       v_carry = v_tmp;
1433     }
1434   }
1435   u[m+n] = u_carry;
1436 
1437   DEBUG(dbgs() << "KnuthDiv:   normal:");
1438   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1439   DEBUG(dbgs() << " by");
1440   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1441   DEBUG(dbgs() << '\n');
1442 
1443   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1444   int j = m;
1445   do {
1446     DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1447     // D3. [Calculate q'.].
1448     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1449     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1450     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1451     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1452     // on v[n-2] determines at high speed most of the cases in which the trial
1453     // value qp is one too large, and it eliminates all cases where qp is two
1454     // too large.
1455     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1456     DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1457     uint64_t qp = dividend / v[n-1];
1458     uint64_t rp = dividend % v[n-1];
1459     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1460       qp--;
1461       rp += v[n-1];
1462       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1463         qp--;
1464     }
1465     DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1466 
1467     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1468     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1469     // consists of a simple multiplication by a one-place number, combined with
1470     // a subtraction.
1471     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1472     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1473     // true value plus b**(n+1), namely as the b's complement of
1474     // the true value, and a "borrow" to the left should be remembered.
1475     int64_t borrow = 0;
1476     for (unsigned i = 0; i < n; ++i) {
1477       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1478       int64_t subres = int64_t(u[j+i]) - borrow - (unsigned)p;
1479       u[j+i] = (unsigned)subres;
1480       borrow = (p >> 32) - (subres >> 32);
1481       DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i]
1482                    << ", borrow = " << borrow << '\n');
1483     }
1484     bool isNeg = u[j+n] < borrow;
1485     u[j+n] -= (unsigned)borrow;
1486 
1487     DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1488     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1489     DEBUG(dbgs() << '\n');
1490 
1491     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1492     // negative, go to step D6; otherwise go on to step D7.
1493     q[j] = (unsigned)qp;
1494     if (isNeg) {
1495       // D6. [Add back]. The probability that this step is necessary is very
1496       // small, on the order of only 2/b. Make sure that test data accounts for
1497       // this possibility. Decrease q[j] by 1
1498       q[j]--;
1499       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1500       // A carry will occur to the left of u[j+n], and it should be ignored
1501       // since it cancels with the borrow that occurred in D4.
1502       bool carry = false;
1503       for (unsigned i = 0; i < n; i++) {
1504         unsigned limit = std::min(u[j+i],v[i]);
1505         u[j+i] += v[i] + carry;
1506         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1507       }
1508       u[j+n] += carry;
1509     }
1510     DEBUG(dbgs() << "KnuthDiv: after correction:");
1511     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1512     DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1513 
1514   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1515   } while (--j >= 0);
1516 
1517   DEBUG(dbgs() << "KnuthDiv: quotient:");
1518   DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1519   DEBUG(dbgs() << '\n');
1520 
1521   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1522   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1523   // compute the remainder (urem uses this).
1524   if (r) {
1525     // The value d is expressed by the "shift" value above since we avoided
1526     // multiplication by d by using a shift left. So, all we have to do is
1527     // shift right here.
1528     if (shift) {
1529       unsigned carry = 0;
1530       DEBUG(dbgs() << "KnuthDiv: remainder:");
1531       for (int i = n-1; i >= 0; i--) {
1532         r[i] = (u[i] >> shift) | carry;
1533         carry = u[i] << (32 - shift);
1534         DEBUG(dbgs() << " " << r[i]);
1535       }
1536     } else {
1537       for (int i = n-1; i >= 0; i--) {
1538         r[i] = u[i];
1539         DEBUG(dbgs() << " " << r[i]);
1540       }
1541     }
1542     DEBUG(dbgs() << '\n');
1543   }
1544   DEBUG(dbgs() << '\n');
1545 }
1546 
1547 void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
1548                    unsigned rhsWords, APInt *Quotient, APInt *Remainder) {
1549   assert(lhsWords >= rhsWords && "Fractional result");
1550 
1551   // First, compose the values into an array of 32-bit words instead of
1552   // 64-bit words. This is a necessity of both the "short division" algorithm
1553   // and the Knuth "classical algorithm" which requires there to be native
1554   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1555   // can't use 64-bit operands here because we don't have native results of
1556   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1557   // work on large-endian machines.
1558   uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1559   unsigned n = rhsWords * 2;
1560   unsigned m = (lhsWords * 2) - n;
1561 
1562   // Allocate space for the temporary values we need either on the stack, if
1563   // it will fit, or on the heap if it won't.
1564   unsigned SPACE[128];
1565   unsigned *U = nullptr;
1566   unsigned *V = nullptr;
1567   unsigned *Q = nullptr;
1568   unsigned *R = nullptr;
1569   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1570     U = &SPACE[0];
1571     V = &SPACE[m+n+1];
1572     Q = &SPACE[(m+n+1) + n];
1573     if (Remainder)
1574       R = &SPACE[(m+n+1) + n + (m+n)];
1575   } else {
1576     U = new unsigned[m + n + 1];
1577     V = new unsigned[n];
1578     Q = new unsigned[m+n];
1579     if (Remainder)
1580       R = new unsigned[n];
1581   }
1582 
1583   // Initialize the dividend
1584   memset(U, 0, (m+n+1)*sizeof(unsigned));
1585   for (unsigned i = 0; i < lhsWords; ++i) {
1586     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1587     U[i * 2] = (unsigned)(tmp & mask);
1588     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1589   }
1590   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1591 
1592   // Initialize the divisor
1593   memset(V, 0, (n)*sizeof(unsigned));
1594   for (unsigned i = 0; i < rhsWords; ++i) {
1595     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1596     V[i * 2] = (unsigned)(tmp & mask);
1597     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1598   }
1599 
1600   // initialize the quotient and remainder
1601   memset(Q, 0, (m+n) * sizeof(unsigned));
1602   if (Remainder)
1603     memset(R, 0, n * sizeof(unsigned));
1604 
1605   // Now, adjust m and n for the Knuth division. n is the number of words in
1606   // the divisor. m is the number of words by which the dividend exceeds the
1607   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1608   // contain any zero words or the Knuth algorithm fails.
1609   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1610     n--;
1611     m++;
1612   }
1613   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1614     m--;
1615 
1616   // If we're left with only a single word for the divisor, Knuth doesn't work
1617   // so we implement the short division algorithm here. This is much simpler
1618   // and faster because we are certain that we can divide a 64-bit quantity
1619   // by a 32-bit quantity at hardware speed and short division is simply a
1620   // series of such operations. This is just like doing short division but we
1621   // are using base 2^32 instead of base 10.
1622   assert(n != 0 && "Divide by zero?");
1623   if (n == 1) {
1624     unsigned divisor = V[0];
1625     unsigned remainder = 0;
1626     for (int i = m+n-1; i >= 0; i--) {
1627       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1628       if (partial_dividend == 0) {
1629         Q[i] = 0;
1630         remainder = 0;
1631       } else if (partial_dividend < divisor) {
1632         Q[i] = 0;
1633         remainder = (unsigned)partial_dividend;
1634       } else if (partial_dividend == divisor) {
1635         Q[i] = 1;
1636         remainder = 0;
1637       } else {
1638         Q[i] = (unsigned)(partial_dividend / divisor);
1639         remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1640       }
1641     }
1642     if (R)
1643       R[0] = remainder;
1644   } else {
1645     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1646     // case n > 1.
1647     KnuthDiv(U, V, Q, R, m, n);
1648   }
1649 
1650   // If the caller wants the quotient
1651   if (Quotient) {
1652     // Set up the Quotient value's memory.
1653     if (Quotient->BitWidth != LHS.BitWidth) {
1654       if (Quotient->isSingleWord())
1655         Quotient->VAL = 0;
1656       else
1657         delete [] Quotient->pVal;
1658       Quotient->BitWidth = LHS.BitWidth;
1659       if (!Quotient->isSingleWord())
1660         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1661     } else
1662       Quotient->clearAllBits();
1663 
1664     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1665     // order words.
1666     // This case is currently dead as all users of divide() handle trivial cases
1667     // earlier.
1668     if (lhsWords == 1) {
1669       uint64_t tmp =
1670         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1671       if (Quotient->isSingleWord())
1672         Quotient->VAL = tmp;
1673       else
1674         Quotient->pVal[0] = tmp;
1675     } else {
1676       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1677       for (unsigned i = 0; i < lhsWords; ++i)
1678         Quotient->pVal[i] =
1679           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1680     }
1681   }
1682 
1683   // If the caller wants the remainder
1684   if (Remainder) {
1685     // Set up the Remainder value's memory.
1686     if (Remainder->BitWidth != RHS.BitWidth) {
1687       if (Remainder->isSingleWord())
1688         Remainder->VAL = 0;
1689       else
1690         delete [] Remainder->pVal;
1691       Remainder->BitWidth = RHS.BitWidth;
1692       if (!Remainder->isSingleWord())
1693         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1694     } else
1695       Remainder->clearAllBits();
1696 
1697     // The remainder is in R. Reconstitute the remainder into Remainder's low
1698     // order words.
1699     if (rhsWords == 1) {
1700       uint64_t tmp =
1701         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1702       if (Remainder->isSingleWord())
1703         Remainder->VAL = tmp;
1704       else
1705         Remainder->pVal[0] = tmp;
1706     } else {
1707       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1708       for (unsigned i = 0; i < rhsWords; ++i)
1709         Remainder->pVal[i] =
1710           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1711     }
1712   }
1713 
1714   // Clean up the memory we allocated.
1715   if (U != &SPACE[0]) {
1716     delete [] U;
1717     delete [] V;
1718     delete [] Q;
1719     delete [] R;
1720   }
1721 }
1722 
1723 APInt APInt::udiv(const APInt& RHS) const {
1724   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1725 
1726   // First, deal with the easy case
1727   if (isSingleWord()) {
1728     assert(RHS.VAL != 0 && "Divide by zero?");
1729     return APInt(BitWidth, VAL / RHS.VAL);
1730   }
1731 
1732   // Get some facts about the LHS and RHS number of bits and words
1733   unsigned rhsBits = RHS.getActiveBits();
1734   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1735   assert(rhsWords && "Divided by zero???");
1736   unsigned lhsBits = this->getActiveBits();
1737   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1738 
1739   // Deal with some degenerate cases
1740   if (!lhsWords)
1741     // 0 / X ===> 0
1742     return APInt(BitWidth, 0);
1743   else if (lhsWords < rhsWords || this->ult(RHS)) {
1744     // X / Y ===> 0, iff X < Y
1745     return APInt(BitWidth, 0);
1746   } else if (*this == RHS) {
1747     // X / X ===> 1
1748     return APInt(BitWidth, 1);
1749   } else if (lhsWords == 1 && rhsWords == 1) {
1750     // All high words are zero, just use native divide
1751     return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1752   }
1753 
1754   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1755   APInt Quotient(1,0); // to hold result.
1756   divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr);
1757   return Quotient;
1758 }
1759 
1760 APInt APInt::sdiv(const APInt &RHS) const {
1761   if (isNegative()) {
1762     if (RHS.isNegative())
1763       return (-(*this)).udiv(-RHS);
1764     return -((-(*this)).udiv(RHS));
1765   }
1766   if (RHS.isNegative())
1767     return -(this->udiv(-RHS));
1768   return this->udiv(RHS);
1769 }
1770 
1771 APInt APInt::urem(const APInt& RHS) const {
1772   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1773   if (isSingleWord()) {
1774     assert(RHS.VAL != 0 && "Remainder by zero?");
1775     return APInt(BitWidth, VAL % RHS.VAL);
1776   }
1777 
1778   // Get some facts about the LHS
1779   unsigned lhsBits = getActiveBits();
1780   unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1781 
1782   // Get some facts about the RHS
1783   unsigned rhsBits = RHS.getActiveBits();
1784   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1785   assert(rhsWords && "Performing remainder operation by zero ???");
1786 
1787   // Check the degenerate cases
1788   if (lhsWords == 0) {
1789     // 0 % Y ===> 0
1790     return APInt(BitWidth, 0);
1791   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1792     // X % Y ===> X, iff X < Y
1793     return *this;
1794   } else if (*this == RHS) {
1795     // X % X == 0;
1796     return APInt(BitWidth, 0);
1797   } else if (lhsWords == 1) {
1798     // All high words are zero, just use native remainder
1799     return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1800   }
1801 
1802   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1803   APInt Remainder(1,0);
1804   divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder);
1805   return Remainder;
1806 }
1807 
1808 APInt APInt::srem(const APInt &RHS) const {
1809   if (isNegative()) {
1810     if (RHS.isNegative())
1811       return -((-(*this)).urem(-RHS));
1812     return -((-(*this)).urem(RHS));
1813   }
1814   if (RHS.isNegative())
1815     return this->urem(-RHS);
1816   return this->urem(RHS);
1817 }
1818 
1819 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1820                     APInt &Quotient, APInt &Remainder) {
1821   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1822 
1823   // First, deal with the easy case
1824   if (LHS.isSingleWord()) {
1825     assert(RHS.VAL != 0 && "Divide by zero?");
1826     uint64_t QuotVal = LHS.VAL / RHS.VAL;
1827     uint64_t RemVal = LHS.VAL % RHS.VAL;
1828     Quotient = APInt(LHS.BitWidth, QuotVal);
1829     Remainder = APInt(LHS.BitWidth, RemVal);
1830     return;
1831   }
1832 
1833   // Get some size facts about the dividend and divisor
1834   unsigned lhsBits  = LHS.getActiveBits();
1835   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1836   unsigned rhsBits  = RHS.getActiveBits();
1837   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1838 
1839   // Check the degenerate cases
1840   if (lhsWords == 0) {
1841     Quotient = 0;                // 0 / Y ===> 0
1842     Remainder = 0;               // 0 % Y ===> 0
1843     return;
1844   }
1845 
1846   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1847     Remainder = LHS;            // X % Y ===> X, iff X < Y
1848     Quotient = 0;               // X / Y ===> 0, iff X < Y
1849     return;
1850   }
1851 
1852   if (LHS == RHS) {
1853     Quotient  = 1;              // X / X ===> 1
1854     Remainder = 0;              // X % X ===> 0;
1855     return;
1856   }
1857 
1858   if (lhsWords == 1 && rhsWords == 1) {
1859     // There is only one word to consider so use the native versions.
1860     uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
1861     uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
1862     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1863     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1864     return;
1865   }
1866 
1867   // Okay, lets do it the long way
1868   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
1869 }
1870 
1871 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1872                     APInt &Quotient, APInt &Remainder) {
1873   if (LHS.isNegative()) {
1874     if (RHS.isNegative())
1875       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1876     else {
1877       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1878       Quotient = -Quotient;
1879     }
1880     Remainder = -Remainder;
1881   } else if (RHS.isNegative()) {
1882     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1883     Quotient = -Quotient;
1884   } else {
1885     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1886   }
1887 }
1888 
1889 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1890   APInt Res = *this+RHS;
1891   Overflow = isNonNegative() == RHS.isNonNegative() &&
1892              Res.isNonNegative() != isNonNegative();
1893   return Res;
1894 }
1895 
1896 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1897   APInt Res = *this+RHS;
1898   Overflow = Res.ult(RHS);
1899   return Res;
1900 }
1901 
1902 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1903   APInt Res = *this - RHS;
1904   Overflow = isNonNegative() != RHS.isNonNegative() &&
1905              Res.isNonNegative() != isNonNegative();
1906   return Res;
1907 }
1908 
1909 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1910   APInt Res = *this-RHS;
1911   Overflow = Res.ugt(*this);
1912   return Res;
1913 }
1914 
1915 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1916   // MININT/-1  -->  overflow.
1917   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1918   return sdiv(RHS);
1919 }
1920 
1921 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1922   APInt Res = *this * RHS;
1923 
1924   if (*this != 0 && RHS != 0)
1925     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1926   else
1927     Overflow = false;
1928   return Res;
1929 }
1930 
1931 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1932   APInt Res = *this * RHS;
1933 
1934   if (*this != 0 && RHS != 0)
1935     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1936   else
1937     Overflow = false;
1938   return Res;
1939 }
1940 
1941 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1942   Overflow = ShAmt.uge(getBitWidth());
1943   if (Overflow)
1944     return APInt(BitWidth, 0);
1945 
1946   if (isNonNegative()) // Don't allow sign change.
1947     Overflow = ShAmt.uge(countLeadingZeros());
1948   else
1949     Overflow = ShAmt.uge(countLeadingOnes());
1950 
1951   return *this << ShAmt;
1952 }
1953 
1954 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1955   Overflow = ShAmt.uge(getBitWidth());
1956   if (Overflow)
1957     return APInt(BitWidth, 0);
1958 
1959   Overflow = ShAmt.ugt(countLeadingZeros());
1960 
1961   return *this << ShAmt;
1962 }
1963 
1964 
1965 
1966 
1967 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1968   // Check our assumptions here
1969   assert(!str.empty() && "Invalid string length");
1970   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1971           radix == 36) &&
1972          "Radix should be 2, 8, 10, 16, or 36!");
1973 
1974   StringRef::iterator p = str.begin();
1975   size_t slen = str.size();
1976   bool isNeg = *p == '-';
1977   if (*p == '-' || *p == '+') {
1978     p++;
1979     slen--;
1980     assert(slen && "String is only a sign, needs a value.");
1981   }
1982   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1983   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
1984   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
1985   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
1986          "Insufficient bit width");
1987 
1988   // Allocate memory
1989   if (!isSingleWord())
1990     pVal = getClearedMemory(getNumWords());
1991 
1992   // Figure out if we can shift instead of multiply
1993   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1994 
1995   // Set up an APInt for the radix multiplier outside the loop so we don't
1996   // constantly construct/destruct it.
1997   APInt apradix(getBitWidth(), radix);
1998 
1999   // Enter digit traversal loop
2000   for (StringRef::iterator e = str.end(); p != e; ++p) {
2001     unsigned digit = getDigit(*p, radix);
2002     assert(digit < radix && "Invalid character in digit string");
2003 
2004     // Shift or multiply the value by the radix
2005     if (slen > 1) {
2006       if (shift)
2007         *this <<= shift;
2008       else
2009         *this *= apradix;
2010     }
2011 
2012     // Add in the digit we just interpreted
2013     *this += digit;
2014   }
2015   // If its negative, put it in two's complement form
2016   if (isNeg) {
2017     --(*this);
2018     this->flipAllBits();
2019   }
2020 }
2021 
2022 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2023                      bool Signed, bool formatAsCLiteral) const {
2024   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2025           Radix == 36) &&
2026          "Radix should be 2, 8, 10, 16, or 36!");
2027 
2028   const char *Prefix = "";
2029   if (formatAsCLiteral) {
2030     switch (Radix) {
2031       case 2:
2032         // Binary literals are a non-standard extension added in gcc 4.3:
2033         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2034         Prefix = "0b";
2035         break;
2036       case 8:
2037         Prefix = "0";
2038         break;
2039       case 10:
2040         break; // No prefix
2041       case 16:
2042         Prefix = "0x";
2043         break;
2044       default:
2045         llvm_unreachable("Invalid radix!");
2046     }
2047   }
2048 
2049   // First, check for a zero value and just short circuit the logic below.
2050   if (*this == 0) {
2051     while (*Prefix) {
2052       Str.push_back(*Prefix);
2053       ++Prefix;
2054     };
2055     Str.push_back('0');
2056     return;
2057   }
2058 
2059   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2060 
2061   if (isSingleWord()) {
2062     char Buffer[65];
2063     char *BufPtr = Buffer+65;
2064 
2065     uint64_t N;
2066     if (!Signed) {
2067       N = getZExtValue();
2068     } else {
2069       int64_t I = getSExtValue();
2070       if (I >= 0) {
2071         N = I;
2072       } else {
2073         Str.push_back('-');
2074         N = -(uint64_t)I;
2075       }
2076     }
2077 
2078     while (*Prefix) {
2079       Str.push_back(*Prefix);
2080       ++Prefix;
2081     };
2082 
2083     while (N) {
2084       *--BufPtr = Digits[N % Radix];
2085       N /= Radix;
2086     }
2087     Str.append(BufPtr, Buffer+65);
2088     return;
2089   }
2090 
2091   APInt Tmp(*this);
2092 
2093   if (Signed && isNegative()) {
2094     // They want to print the signed version and it is a negative value
2095     // Flip the bits and add one to turn it into the equivalent positive
2096     // value and put a '-' in the result.
2097     Tmp.flipAllBits();
2098     ++Tmp;
2099     Str.push_back('-');
2100   }
2101 
2102   while (*Prefix) {
2103     Str.push_back(*Prefix);
2104     ++Prefix;
2105   };
2106 
2107   // We insert the digits backward, then reverse them to get the right order.
2108   unsigned StartDig = Str.size();
2109 
2110   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2111   // because the number of bits per digit (1, 3 and 4 respectively) divides
2112   // equally.  We just shift until the value is zero.
2113   if (Radix == 2 || Radix == 8 || Radix == 16) {
2114     // Just shift tmp right for each digit width until it becomes zero
2115     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2116     unsigned MaskAmt = Radix - 1;
2117 
2118     while (Tmp != 0) {
2119       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2120       Str.push_back(Digits[Digit]);
2121       Tmp.lshrInPlace(ShiftAmt);
2122     }
2123   } else {
2124     APInt divisor(Radix == 10? 4 : 8, Radix);
2125     while (Tmp != 0) {
2126       APInt APdigit(1, 0);
2127       APInt tmp2(Tmp.getBitWidth(), 0);
2128       divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2129              &APdigit);
2130       unsigned Digit = (unsigned)APdigit.getZExtValue();
2131       assert(Digit < Radix && "divide failed");
2132       Str.push_back(Digits[Digit]);
2133       Tmp = tmp2;
2134     }
2135   }
2136 
2137   // Reverse the digits before returning.
2138   std::reverse(Str.begin()+StartDig, Str.end());
2139 }
2140 
2141 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2142 /// It is better to pass in a SmallVector/SmallString to the methods above.
2143 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2144   SmallString<40> S;
2145   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2146   return S.str();
2147 }
2148 
2149 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2150 LLVM_DUMP_METHOD void APInt::dump() const {
2151   SmallString<40> S, U;
2152   this->toStringUnsigned(U);
2153   this->toStringSigned(S);
2154   dbgs() << "APInt(" << BitWidth << "b, "
2155          << U << "u " << S << "s)\n";
2156 }
2157 #endif
2158 
2159 void APInt::print(raw_ostream &OS, bool isSigned) const {
2160   SmallString<40> S;
2161   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2162   OS << S;
2163 }
2164 
2165 // This implements a variety of operations on a representation of
2166 // arbitrary precision, two's-complement, bignum integer values.
2167 
2168 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2169 // and unrestricting assumption.
2170 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2171               "Part width must be divisible by 2!");
2172 
2173 /* Some handy functions local to this file.  */
2174 
2175 /* Returns the integer part with the least significant BITS set.
2176    BITS cannot be zero.  */
2177 static inline APInt::WordType lowBitMask(unsigned bits) {
2178   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2179 
2180   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2181 }
2182 
2183 /* Returns the value of the lower half of PART.  */
2184 static inline APInt::WordType lowHalf(APInt::WordType part) {
2185   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2186 }
2187 
2188 /* Returns the value of the upper half of PART.  */
2189 static inline APInt::WordType highHalf(APInt::WordType part) {
2190   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2191 }
2192 
2193 /* Returns the bit number of the most significant set bit of a part.
2194    If the input number has no bits set -1U is returned.  */
2195 static unsigned partMSB(APInt::WordType value) {
2196   return findLastSet(value, ZB_Max);
2197 }
2198 
2199 /* Returns the bit number of the least significant set bit of a
2200    part.  If the input number has no bits set -1U is returned.  */
2201 static unsigned partLSB(APInt::WordType value) {
2202   return findFirstSet(value, ZB_Max);
2203 }
2204 
2205 /* Sets the least significant part of a bignum to the input value, and
2206    zeroes out higher parts.  */
2207 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2208   assert(parts > 0);
2209 
2210   dst[0] = part;
2211   for (unsigned i = 1; i < parts; i++)
2212     dst[i] = 0;
2213 }
2214 
2215 /* Assign one bignum to another.  */
2216 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2217   for (unsigned i = 0; i < parts; i++)
2218     dst[i] = src[i];
2219 }
2220 
2221 /* Returns true if a bignum is zero, false otherwise.  */
2222 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2223   for (unsigned i = 0; i < parts; i++)
2224     if (src[i])
2225       return false;
2226 
2227   return true;
2228 }
2229 
2230 /* Extract the given bit of a bignum; returns 0 or 1.  */
2231 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2232   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2233 }
2234 
2235 /* Set the given bit of a bignum. */
2236 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2237   parts[whichWord(bit)] |= maskBit(bit);
2238 }
2239 
2240 /* Clears the given bit of a bignum. */
2241 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2242   parts[whichWord(bit)] &= ~maskBit(bit);
2243 }
2244 
2245 /* Returns the bit number of the least significant set bit of a
2246    number.  If the input number has no bits set -1U is returned.  */
2247 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2248   for (unsigned i = 0; i < n; i++) {
2249     if (parts[i] != 0) {
2250       unsigned lsb = partLSB(parts[i]);
2251 
2252       return lsb + i * APINT_BITS_PER_WORD;
2253     }
2254   }
2255 
2256   return -1U;
2257 }
2258 
2259 /* Returns the bit number of the most significant set bit of a number.
2260    If the input number has no bits set -1U is returned.  */
2261 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2262   do {
2263     --n;
2264 
2265     if (parts[n] != 0) {
2266       unsigned msb = partMSB(parts[n]);
2267 
2268       return msb + n * APINT_BITS_PER_WORD;
2269     }
2270   } while (n);
2271 
2272   return -1U;
2273 }
2274 
2275 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2276    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2277    the least significant bit of DST.  All high bits above srcBITS in
2278    DST are zero-filled.  */
2279 void
2280 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2281                  unsigned srcBits, unsigned srcLSB) {
2282   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2283   assert(dstParts <= dstCount);
2284 
2285   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2286   tcAssign (dst, src + firstSrcPart, dstParts);
2287 
2288   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2289   tcShiftRight (dst, dstParts, shift);
2290 
2291   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2292      in DST.  If this is less that srcBits, append the rest, else
2293      clear the high bits.  */
2294   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2295   if (n < srcBits) {
2296     WordType mask = lowBitMask (srcBits - n);
2297     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2298                           << n % APINT_BITS_PER_WORD);
2299   } else if (n > srcBits) {
2300     if (srcBits % APINT_BITS_PER_WORD)
2301       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2302   }
2303 
2304   /* Clear high parts.  */
2305   while (dstParts < dstCount)
2306     dst[dstParts++] = 0;
2307 }
2308 
2309 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2310 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2311                              WordType c, unsigned parts) {
2312   assert(c <= 1);
2313 
2314   for (unsigned i = 0; i < parts; i++) {
2315     WordType l = dst[i];
2316     if (c) {
2317       dst[i] += rhs[i] + 1;
2318       c = (dst[i] <= l);
2319     } else {
2320       dst[i] += rhs[i];
2321       c = (dst[i] < l);
2322     }
2323   }
2324 
2325   return c;
2326 }
2327 
2328 /// This function adds a single "word" integer, src, to the multiple
2329 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2330 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2331 /// @returns the carry of the addition.
2332 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2333                                  unsigned parts) {
2334   for (unsigned i = 0; i < parts; ++i) {
2335     dst[i] += src;
2336     if (dst[i] >= src)
2337       return 0; // No need to carry so exit early.
2338     src = 1; // Carry one to next digit.
2339   }
2340 
2341   return 1;
2342 }
2343 
2344 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2345 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2346                                   WordType c, unsigned parts) {
2347   assert(c <= 1);
2348 
2349   for (unsigned i = 0; i < parts; i++) {
2350     WordType l = dst[i];
2351     if (c) {
2352       dst[i] -= rhs[i] + 1;
2353       c = (dst[i] >= l);
2354     } else {
2355       dst[i] -= rhs[i];
2356       c = (dst[i] > l);
2357     }
2358   }
2359 
2360   return c;
2361 }
2362 
2363 /// This function subtracts a single "word" (64-bit word), src, from
2364 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2365 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2366 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2367 /// exhausted. In other words, if src > dst then this function returns 1,
2368 /// otherwise 0.
2369 /// @returns the borrow out of the subtraction
2370 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2371                                       unsigned parts) {
2372   for (unsigned i = 0; i < parts; ++i) {
2373     WordType Dst = dst[i];
2374     dst[i] -= src;
2375     if (src <= Dst)
2376       return 0; // No need to borrow so exit early.
2377     src = 1; // We have to "borrow 1" from next "word"
2378   }
2379 
2380   return 1;
2381 }
2382 
2383 /* Negate a bignum in-place.  */
2384 void APInt::tcNegate(WordType *dst, unsigned parts) {
2385   tcComplement(dst, parts);
2386   tcIncrement(dst, parts);
2387 }
2388 
2389 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2390     DST  = SRC * MULTIPLIER + CARRY   if add is false
2391 
2392     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2393     they must start at the same point, i.e. DST == SRC.
2394 
2395     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2396     returned.  Otherwise DST is filled with the least significant
2397     DSTPARTS parts of the result, and if all of the omitted higher
2398     parts were zero return zero, otherwise overflow occurred and
2399     return one.  */
2400 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2401                           WordType multiplier, WordType carry,
2402                           unsigned srcParts, unsigned dstParts,
2403                           bool add) {
2404   /* Otherwise our writes of DST kill our later reads of SRC.  */
2405   assert(dst <= src || dst >= src + srcParts);
2406   assert(dstParts <= srcParts + 1);
2407 
2408   /* N loops; minimum of dstParts and srcParts.  */
2409   unsigned n = dstParts < srcParts ? dstParts: srcParts;
2410 
2411   unsigned i;
2412   for (i = 0; i < n; i++) {
2413     WordType low, mid, high, srcPart;
2414 
2415       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2416 
2417          This cannot overflow, because
2418 
2419          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2420 
2421          which is less than n^2.  */
2422 
2423     srcPart = src[i];
2424 
2425     if (multiplier == 0 || srcPart == 0) {
2426       low = carry;
2427       high = 0;
2428     } else {
2429       low = lowHalf(srcPart) * lowHalf(multiplier);
2430       high = highHalf(srcPart) * highHalf(multiplier);
2431 
2432       mid = lowHalf(srcPart) * highHalf(multiplier);
2433       high += highHalf(mid);
2434       mid <<= APINT_BITS_PER_WORD / 2;
2435       if (low + mid < low)
2436         high++;
2437       low += mid;
2438 
2439       mid = highHalf(srcPart) * lowHalf(multiplier);
2440       high += highHalf(mid);
2441       mid <<= APINT_BITS_PER_WORD / 2;
2442       if (low + mid < low)
2443         high++;
2444       low += mid;
2445 
2446       /* Now add carry.  */
2447       if (low + carry < low)
2448         high++;
2449       low += carry;
2450     }
2451 
2452     if (add) {
2453       /* And now DST[i], and store the new low part there.  */
2454       if (low + dst[i] < low)
2455         high++;
2456       dst[i] += low;
2457     } else
2458       dst[i] = low;
2459 
2460     carry = high;
2461   }
2462 
2463   if (i < dstParts) {
2464     /* Full multiplication, there is no overflow.  */
2465     assert(i + 1 == dstParts);
2466     dst[i] = carry;
2467     return 0;
2468   } else {
2469     /* We overflowed if there is carry.  */
2470     if (carry)
2471       return 1;
2472 
2473     /* We would overflow if any significant unwritten parts would be
2474        non-zero.  This is true if any remaining src parts are non-zero
2475        and the multiplier is non-zero.  */
2476     if (multiplier)
2477       for (; i < srcParts; i++)
2478         if (src[i])
2479           return 1;
2480 
2481     /* We fitted in the narrow destination.  */
2482     return 0;
2483   }
2484 }
2485 
2486 /* DST = LHS * RHS, where DST has the same width as the operands and
2487    is filled with the least significant parts of the result.  Returns
2488    one if overflow occurred, otherwise zero.  DST must be disjoint
2489    from both operands.  */
2490 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2491                       const WordType *rhs, unsigned parts) {
2492   assert(dst != lhs && dst != rhs);
2493 
2494   int overflow = 0;
2495   tcSet(dst, 0, parts);
2496 
2497   for (unsigned i = 0; i < parts; i++)
2498     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2499                                parts - i, true);
2500 
2501   return overflow;
2502 }
2503 
2504 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2505    operands.  No overflow occurs.  DST must be disjoint from both
2506    operands.  Returns the number of parts required to hold the
2507    result.  */
2508 unsigned APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2509                                const WordType *rhs, unsigned lhsParts,
2510                                unsigned rhsParts) {
2511   /* Put the narrower number on the LHS for less loops below.  */
2512   if (lhsParts > rhsParts) {
2513     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2514   } else {
2515     assert(dst != lhs && dst != rhs);
2516 
2517     tcSet(dst, 0, rhsParts);
2518 
2519     for (unsigned i = 0; i < lhsParts; i++)
2520       tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2521 
2522     unsigned n = lhsParts + rhsParts;
2523 
2524     return n - (dst[n - 1] == 0);
2525   }
2526 }
2527 
2528 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2529    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2530    set REMAINDER to the remainder, return zero.  i.e.
2531 
2532    OLD_LHS = RHS * LHS + REMAINDER
2533 
2534    SCRATCH is a bignum of the same size as the operands and result for
2535    use by the routine; its contents need not be initialized and are
2536    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2537 */
2538 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2539                     WordType *remainder, WordType *srhs,
2540                     unsigned parts) {
2541   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2542 
2543   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2544   if (shiftCount == 0)
2545     return true;
2546 
2547   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2548   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2549   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2550 
2551   tcAssign(srhs, rhs, parts);
2552   tcShiftLeft(srhs, parts, shiftCount);
2553   tcAssign(remainder, lhs, parts);
2554   tcSet(lhs, 0, parts);
2555 
2556   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2557      the total.  */
2558   for (;;) {
2559       int compare;
2560 
2561       compare = tcCompare(remainder, srhs, parts);
2562       if (compare >= 0) {
2563         tcSubtract(remainder, srhs, 0, parts);
2564         lhs[n] |= mask;
2565       }
2566 
2567       if (shiftCount == 0)
2568         break;
2569       shiftCount--;
2570       tcShiftRight(srhs, parts, 1);
2571       if ((mask >>= 1) == 0) {
2572         mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2573         n--;
2574       }
2575   }
2576 
2577   return false;
2578 }
2579 
2580 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2581 /// no restrictions on Count.
2582 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2583   // Don't bother performing a no-op shift.
2584   if (!Count)
2585     return;
2586 
2587   /* WordShift is the inter-part shift; BitShift is is intra-part shift.  */
2588   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2589   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2590 
2591   // Fastpath for moving by whole words.
2592   if (BitShift == 0) {
2593     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2594   } else {
2595     while (Words-- > WordShift) {
2596       Dst[Words] = Dst[Words - WordShift] << BitShift;
2597       if (Words > WordShift)
2598         Dst[Words] |=
2599           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2600     }
2601   }
2602 
2603   // Fill in the remainder with 0s.
2604   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2605 }
2606 
2607 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2608 /// are no restrictions on Count.
2609 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2610   // Don't bother performing a no-op shift.
2611   if (!Count)
2612     return;
2613 
2614   // WordShift is the inter-part shift; BitShift is is intra-part shift.
2615   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2616   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2617 
2618   unsigned WordsToMove = Words - WordShift;
2619   // Fastpath for moving by whole words.
2620   if (BitShift == 0) {
2621     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2622   } else {
2623     for (unsigned i = 0; i != WordsToMove; ++i) {
2624       Dst[i] = Dst[i + WordShift] >> BitShift;
2625       if (i + 1 != WordsToMove)
2626         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2627     }
2628   }
2629 
2630   // Fill in the remainder with 0s.
2631   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2632 }
2633 
2634 /* Bitwise and of two bignums.  */
2635 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2636   for (unsigned i = 0; i < parts; i++)
2637     dst[i] &= rhs[i];
2638 }
2639 
2640 /* Bitwise inclusive or of two bignums.  */
2641 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2642   for (unsigned i = 0; i < parts; i++)
2643     dst[i] |= rhs[i];
2644 }
2645 
2646 /* Bitwise exclusive or of two bignums.  */
2647 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2648   for (unsigned i = 0; i < parts; i++)
2649     dst[i] ^= rhs[i];
2650 }
2651 
2652 /* Complement a bignum in-place.  */
2653 void APInt::tcComplement(WordType *dst, unsigned parts) {
2654   for (unsigned i = 0; i < parts; i++)
2655     dst[i] = ~dst[i];
2656 }
2657 
2658 /* Comparison (unsigned) of two bignums.  */
2659 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2660                      unsigned parts) {
2661   while (parts) {
2662     parts--;
2663     if (lhs[parts] != rhs[parts])
2664       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2665   }
2666 
2667   return 0;
2668 }
2669 
2670 /* Set the least significant BITS bits of a bignum, clear the
2671    rest.  */
2672 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2673                                       unsigned bits) {
2674   unsigned i = 0;
2675   while (bits > APINT_BITS_PER_WORD) {
2676     dst[i++] = ~(WordType) 0;
2677     bits -= APINT_BITS_PER_WORD;
2678   }
2679 
2680   if (bits)
2681     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2682 
2683   while (i < parts)
2684     dst[i++] = 0;
2685 }
2686