1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision integer
11 // constant values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <climits>
26 #include <cmath>
27 #include <cstdlib>
28 #include <cstring>
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "apint"
32 
33 /// A utility function for allocating memory, checking for allocation failures,
34 /// and ensuring the contents are zeroed.
35 inline static uint64_t* getClearedMemory(unsigned numWords) {
36   uint64_t * result = new uint64_t[numWords];
37   assert(result && "APInt memory allocation fails!");
38   memset(result, 0, numWords * sizeof(uint64_t));
39   return result;
40 }
41 
42 /// A utility function for allocating memory and checking for allocation
43 /// failure.  The content is not zeroed.
44 inline static uint64_t* getMemory(unsigned numWords) {
45   uint64_t * result = new uint64_t[numWords];
46   assert(result && "APInt memory allocation fails!");
47   return result;
48 }
49 
50 /// A utility function that converts a character to a digit.
51 inline static unsigned getDigit(char cdigit, uint8_t radix) {
52   unsigned r;
53 
54   if (radix == 16 || radix == 36) {
55     r = cdigit - '0';
56     if (r <= 9)
57       return r;
58 
59     r = cdigit - 'A';
60     if (r <= radix - 11U)
61       return r + 10;
62 
63     r = cdigit - 'a';
64     if (r <= radix - 11U)
65       return r + 10;
66 
67     radix = 10;
68   }
69 
70   r = cdigit - '0';
71   if (r < radix)
72     return r;
73 
74   return -1U;
75 }
76 
77 
78 void APInt::initSlowCase(uint64_t val, bool isSigned) {
79   VAL = 0;
80   pVal = getClearedMemory(getNumWords());
81   pVal[0] = val;
82   if (isSigned && int64_t(val) < 0)
83     for (unsigned i = 1; i < getNumWords(); ++i)
84       pVal[i] = -1ULL;
85   clearUnusedBits();
86 }
87 
88 void APInt::initSlowCase(const APInt& that) {
89   VAL = 0;
90   pVal = getMemory(getNumWords());
91   memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
92 }
93 
94 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
95   assert(BitWidth && "Bitwidth too small");
96   assert(bigVal.data() && "Null pointer detected!");
97   if (isSingleWord())
98     VAL = bigVal[0];
99   else {
100     // Get memory, cleared to 0
101     VAL = 0;
102     pVal = getClearedMemory(getNumWords());
103     // Calculate the number of words to copy
104     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
105     // Copy the words from bigVal to pVal
106     memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE);
107   }
108   // Make sure unused high bits are cleared
109   clearUnusedBits();
110 }
111 
112 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
113   : BitWidth(numBits) {
114   initFromArray(bigVal);
115 }
116 
117 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
118   : BitWidth(numBits) {
119   initFromArray(makeArrayRef(bigVal, numWords));
120 }
121 
122 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
123   : VAL(0), BitWidth(numbits) {
124   assert(BitWidth && "Bitwidth too small");
125   fromString(numbits, Str, radix);
126 }
127 
128 APInt& APInt::AssignSlowCase(const APInt& RHS) {
129   // Don't do anything for X = X
130   if (this == &RHS)
131     return *this;
132 
133   if (BitWidth == RHS.getBitWidth()) {
134     // assume same bit-width single-word case is already handled
135     assert(!isSingleWord());
136     memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
137     return *this;
138   }
139 
140   if (isSingleWord()) {
141     // assume case where both are single words is already handled
142     assert(!RHS.isSingleWord());
143     VAL = 0;
144     pVal = getMemory(RHS.getNumWords());
145     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
146   } else if (getNumWords() == RHS.getNumWords())
147     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
148   else if (RHS.isSingleWord()) {
149     delete [] pVal;
150     VAL = RHS.VAL;
151   } else {
152     delete [] pVal;
153     pVal = getMemory(RHS.getNumWords());
154     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
155   }
156   BitWidth = RHS.BitWidth;
157   return clearUnusedBits();
158 }
159 
160 /// This method 'profiles' an APInt for use with FoldingSet.
161 void APInt::Profile(FoldingSetNodeID& ID) const {
162   ID.AddInteger(BitWidth);
163 
164   if (isSingleWord()) {
165     ID.AddInteger(VAL);
166     return;
167   }
168 
169   unsigned NumWords = getNumWords();
170   for (unsigned i = 0; i < NumWords; ++i)
171     ID.AddInteger(pVal[i]);
172 }
173 
174 /// @brief Prefix increment operator. Increments the APInt by one.
175 APInt& APInt::operator++() {
176   if (isSingleWord())
177     ++VAL;
178   else
179     tcIncrement(pVal, getNumWords());
180   return clearUnusedBits();
181 }
182 
183 /// @brief Prefix decrement operator. Decrements the APInt by one.
184 APInt& APInt::operator--() {
185   if (isSingleWord())
186     --VAL;
187   else
188     tcDecrement(pVal, getNumWords());
189   return clearUnusedBits();
190 }
191 
192 /// Adds the RHS APint to this APInt.
193 /// @returns this, after addition of RHS.
194 /// @brief Addition assignment operator.
195 APInt& APInt::operator+=(const APInt& RHS) {
196   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
197   if (isSingleWord())
198     VAL += RHS.VAL;
199   else
200     tcAdd(pVal, RHS.pVal, 0, getNumWords());
201   return clearUnusedBits();
202 }
203 
204 APInt& APInt::operator+=(uint64_t RHS) {
205   if (isSingleWord())
206     VAL += RHS;
207   else
208     tcAddPart(pVal, RHS, getNumWords());
209   return clearUnusedBits();
210 }
211 
212 /// Subtracts the RHS APInt from this APInt
213 /// @returns this, after subtraction
214 /// @brief Subtraction assignment operator.
215 APInt& APInt::operator-=(const APInt& RHS) {
216   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
217   if (isSingleWord())
218     VAL -= RHS.VAL;
219   else
220     tcSubtract(pVal, RHS.pVal, 0, getNumWords());
221   return clearUnusedBits();
222 }
223 
224 APInt& APInt::operator-=(uint64_t RHS) {
225   if (isSingleWord())
226     VAL -= RHS;
227   else
228     tcSubtractPart(pVal, RHS, getNumWords());
229   return clearUnusedBits();
230 }
231 
232 /// Multiplies an integer array, x, by a uint64_t integer and places the result
233 /// into dest.
234 /// @returns the carry out of the multiplication.
235 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
236 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
237   // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
238   uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
239   uint64_t carry = 0;
240 
241   // For each digit of x.
242   for (unsigned i = 0; i < len; ++i) {
243     // Split x into high and low words
244     uint64_t lx = x[i] & 0xffffffffULL;
245     uint64_t hx = x[i] >> 32;
246     // hasCarry - A flag to indicate if there is a carry to the next digit.
247     // hasCarry == 0, no carry
248     // hasCarry == 1, has carry
249     // hasCarry == 2, no carry and the calculation result == 0.
250     uint8_t hasCarry = 0;
251     dest[i] = carry + lx * ly;
252     // Determine if the add above introduces carry.
253     hasCarry = (dest[i] < carry) ? 1 : 0;
254     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
255     // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
256     // (2^32 - 1) + 2^32 = 2^64.
257     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
258 
259     carry += (lx * hy) & 0xffffffffULL;
260     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
261     carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
262             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
263   }
264   return carry;
265 }
266 
267 /// Multiplies integer array x by integer array y and stores the result into
268 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
269 /// @brief Generalized multiplication of integer arrays.
270 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
271                 unsigned ylen) {
272   dest[xlen] = mul_1(dest, x, xlen, y[0]);
273   for (unsigned i = 1; i < ylen; ++i) {
274     uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
275     uint64_t carry = 0, lx = 0, hx = 0;
276     for (unsigned j = 0; j < xlen; ++j) {
277       lx = x[j] & 0xffffffffULL;
278       hx = x[j] >> 32;
279       // hasCarry - A flag to indicate if has carry.
280       // hasCarry == 0, no carry
281       // hasCarry == 1, has carry
282       // hasCarry == 2, no carry and the calculation result == 0.
283       uint8_t hasCarry = 0;
284       uint64_t resul = carry + lx * ly;
285       hasCarry = (resul < carry) ? 1 : 0;
286       carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
287       hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
288 
289       carry += (lx * hy) & 0xffffffffULL;
290       resul = (carry << 32) | (resul & 0xffffffffULL);
291       dest[i+j] += resul;
292       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
293               (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
294               ((lx * hy) >> 32) + hx * hy;
295     }
296     dest[i+xlen] = carry;
297   }
298 }
299 
300 APInt& APInt::operator*=(const APInt& RHS) {
301   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
302   if (isSingleWord()) {
303     VAL *= RHS.VAL;
304     clearUnusedBits();
305     return *this;
306   }
307 
308   // Get some bit facts about LHS and check for zero
309   unsigned lhsBits = getActiveBits();
310   unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
311   if (!lhsWords)
312     // 0 * X ===> 0
313     return *this;
314 
315   // Get some bit facts about RHS and check for zero
316   unsigned rhsBits = RHS.getActiveBits();
317   unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
318   if (!rhsWords) {
319     // X * 0 ===> 0
320     clearAllBits();
321     return *this;
322   }
323 
324   // Allocate space for the result
325   unsigned destWords = rhsWords + lhsWords;
326   uint64_t *dest = getMemory(destWords);
327 
328   // Perform the long multiply
329   mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
330 
331   // Copy result back into *this
332   clearAllBits();
333   unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
334   memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
335   clearUnusedBits();
336 
337   // delete dest array and return
338   delete[] dest;
339   return *this;
340 }
341 
342 APInt& APInt::AndAssignSlowCase(const APInt& RHS) {
343   tcAnd(pVal, RHS.pVal, getNumWords());
344   return *this;
345 }
346 
347 APInt& APInt::OrAssignSlowCase(const APInt& RHS) {
348   tcOr(pVal, RHS.pVal, getNumWords());
349   return *this;
350 }
351 
352 APInt& APInt::XorAssignSlowCase(const APInt& RHS) {
353   tcXor(pVal, RHS.pVal, getNumWords());
354   return *this;
355 }
356 
357 APInt APInt::operator*(const APInt& RHS) const {
358   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
359   if (isSingleWord())
360     return APInt(BitWidth, VAL * RHS.VAL);
361   APInt Result(*this);
362   Result *= RHS;
363   return Result;
364 }
365 
366 bool APInt::EqualSlowCase(const APInt& RHS) const {
367   return std::equal(pVal, pVal + getNumWords(), RHS.pVal);
368 }
369 
370 bool APInt::EqualSlowCase(uint64_t Val) const {
371   unsigned n = getActiveBits();
372   if (n <= APINT_BITS_PER_WORD)
373     return pVal[0] == Val;
374   else
375     return false;
376 }
377 
378 bool APInt::ult(const APInt& RHS) const {
379   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
380   if (isSingleWord())
381     return VAL < RHS.VAL;
382 
383   // Get active bit length of both operands
384   unsigned n1 = getActiveBits();
385   unsigned n2 = RHS.getActiveBits();
386 
387   // If magnitude of LHS is less than RHS, return true.
388   if (n1 < n2)
389     return true;
390 
391   // If magnitude of RHS is greater than LHS, return false.
392   if (n2 < n1)
393     return false;
394 
395   // If they both fit in a word, just compare the low order word
396   if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
397     return pVal[0] < RHS.pVal[0];
398 
399   // Otherwise, compare all words
400   unsigned topWord = whichWord(std::max(n1,n2)-1);
401   for (int i = topWord; i >= 0; --i) {
402     if (pVal[i] > RHS.pVal[i])
403       return false;
404     if (pVal[i] < RHS.pVal[i])
405       return true;
406   }
407   return false;
408 }
409 
410 bool APInt::slt(const APInt& RHS) const {
411   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
412   if (isSingleWord()) {
413     int64_t lhsSext = SignExtend64(VAL, BitWidth);
414     int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth);
415     return lhsSext < rhsSext;
416   }
417 
418   bool lhsNeg = isNegative();
419   bool rhsNeg = RHS.isNegative();
420 
421   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
422   if (lhsNeg != rhsNeg)
423     return lhsNeg;
424 
425   // Otherwise we can just use an unsigned comparison, because even negative
426   // numbers compare correctly this way if both have the same signed-ness.
427   return ult(RHS);
428 }
429 
430 void APInt::setBit(unsigned bitPosition) {
431   if (isSingleWord())
432     VAL |= maskBit(bitPosition);
433   else
434     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
435 }
436 
437 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
438   unsigned loWord = whichWord(loBit);
439   unsigned hiWord = whichWord(hiBit);
440 
441   // Create an initial mask for the low word with zeros below loBit.
442   uint64_t loMask = UINT64_MAX << whichBit(loBit);
443 
444   // If hiBit is not aligned, we need a high mask.
445   unsigned hiShiftAmt = whichBit(hiBit);
446   if (hiShiftAmt != 0) {
447     // Create a high mask with zeros above hiBit.
448     uint64_t hiMask = UINT64_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
449     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
450     // set the bits in hiWord.
451     if (hiWord == loWord)
452       loMask &= hiMask;
453     else
454       pVal[hiWord] |= hiMask;
455   }
456   // Apply the mask to the low word.
457   pVal[loWord] |= loMask;
458 
459   // Fill any words between loWord and hiWord with all ones.
460   for (unsigned word = loWord + 1; word < hiWord; ++word)
461     pVal[word] = UINT64_MAX;
462 }
463 
464 /// Set the given bit to 0 whose position is given as "bitPosition".
465 /// @brief Set a given bit to 0.
466 void APInt::clearBit(unsigned bitPosition) {
467   if (isSingleWord())
468     VAL &= ~maskBit(bitPosition);
469   else
470     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
471 }
472 
473 /// @brief Toggle every bit to its opposite value.
474 void APInt::flipAllBitsSlowCase() {
475   tcComplement(pVal, getNumWords());
476   clearUnusedBits();
477 }
478 
479 /// Toggle a given bit to its opposite value whose position is given
480 /// as "bitPosition".
481 /// @brief Toggles a given bit to its opposite value.
482 void APInt::flipBit(unsigned bitPosition) {
483   assert(bitPosition < BitWidth && "Out of the bit-width range!");
484   if ((*this)[bitPosition]) clearBit(bitPosition);
485   else setBit(bitPosition);
486 }
487 
488 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
489   unsigned subBitWidth = subBits.getBitWidth();
490   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
491          "Illegal bit insertion");
492 
493   // Insertion is a direct copy.
494   if (subBitWidth == BitWidth) {
495     *this = subBits;
496     return;
497   }
498 
499   // Single word result can be done as a direct bitmask.
500   if (isSingleWord()) {
501     uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
502     VAL &= ~(mask << bitPosition);
503     VAL |= (subBits.VAL << bitPosition);
504     return;
505   }
506 
507   unsigned loBit = whichBit(bitPosition);
508   unsigned loWord = whichWord(bitPosition);
509   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
510 
511   // Insertion within a single word can be done as a direct bitmask.
512   if (loWord == hi1Word) {
513     uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
514     pVal[loWord] &= ~(mask << loBit);
515     pVal[loWord] |= (subBits.VAL << loBit);
516     return;
517   }
518 
519   // Insert on word boundaries.
520   if (loBit == 0) {
521     // Direct copy whole words.
522     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
523     memcpy(pVal + loWord, subBits.getRawData(),
524            numWholeSubWords * APINT_WORD_SIZE);
525 
526     // Mask+insert remaining bits.
527     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
528     if (remainingBits != 0) {
529       uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits);
530       pVal[hi1Word] &= ~mask;
531       pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
532     }
533     return;
534   }
535 
536   // General case - set/clear individual bits in dst based on src.
537   // TODO - there is scope for optimization here, but at the moment this code
538   // path is barely used so prefer readability over performance.
539   for (unsigned i = 0; i != subBitWidth; ++i) {
540     if (subBits[i])
541       setBit(bitPosition + i);
542     else
543       clearBit(bitPosition + i);
544   }
545 }
546 
547 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
548   assert(numBits > 0 && "Can't extract zero bits");
549   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
550          "Illegal bit extraction");
551 
552   if (isSingleWord())
553     return APInt(numBits, VAL >> bitPosition);
554 
555   unsigned loBit = whichBit(bitPosition);
556   unsigned loWord = whichWord(bitPosition);
557   unsigned hiWord = whichWord(bitPosition + numBits - 1);
558 
559   // Single word result extracting bits from a single word source.
560   if (loWord == hiWord)
561     return APInt(numBits, pVal[loWord] >> loBit);
562 
563   // Extracting bits that start on a source word boundary can be done
564   // as a fast memory copy.
565   if (loBit == 0)
566     return APInt(numBits, makeArrayRef(pVal + loWord, 1 + hiWord - loWord));
567 
568   // General case - shift + copy source words directly into place.
569   APInt Result(numBits, 0);
570   unsigned NumSrcWords = getNumWords();
571   unsigned NumDstWords = Result.getNumWords();
572 
573   for (unsigned word = 0; word < NumDstWords; ++word) {
574     uint64_t w0 = pVal[loWord + word];
575     uint64_t w1 =
576         (loWord + word + 1) < NumSrcWords ? pVal[loWord + word + 1] : 0;
577     Result.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
578   }
579 
580   return Result.clearUnusedBits();
581 }
582 
583 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
584   assert(!str.empty() && "Invalid string length");
585   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
586           radix == 36) &&
587          "Radix should be 2, 8, 10, 16, or 36!");
588 
589   size_t slen = str.size();
590 
591   // Each computation below needs to know if it's negative.
592   StringRef::iterator p = str.begin();
593   unsigned isNegative = *p == '-';
594   if (*p == '-' || *p == '+') {
595     p++;
596     slen--;
597     assert(slen && "String is only a sign, needs a value.");
598   }
599 
600   // For radixes of power-of-two values, the bits required is accurately and
601   // easily computed
602   if (radix == 2)
603     return slen + isNegative;
604   if (radix == 8)
605     return slen * 3 + isNegative;
606   if (radix == 16)
607     return slen * 4 + isNegative;
608 
609   // FIXME: base 36
610 
611   // This is grossly inefficient but accurate. We could probably do something
612   // with a computation of roughly slen*64/20 and then adjust by the value of
613   // the first few digits. But, I'm not sure how accurate that could be.
614 
615   // Compute a sufficient number of bits that is always large enough but might
616   // be too large. This avoids the assertion in the constructor. This
617   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
618   // bits in that case.
619   unsigned sufficient
620     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
621                  : (slen == 1 ? 7 : slen * 16/3);
622 
623   // Convert to the actual binary value.
624   APInt tmp(sufficient, StringRef(p, slen), radix);
625 
626   // Compute how many bits are required. If the log is infinite, assume we need
627   // just bit.
628   unsigned log = tmp.logBase2();
629   if (log == (unsigned)-1) {
630     return isNegative + 1;
631   } else {
632     return isNegative + log + 1;
633   }
634 }
635 
636 hash_code llvm::hash_value(const APInt &Arg) {
637   if (Arg.isSingleWord())
638     return hash_combine(Arg.VAL);
639 
640   return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
641 }
642 
643 bool APInt::isSplat(unsigned SplatSizeInBits) const {
644   assert(getBitWidth() % SplatSizeInBits == 0 &&
645          "SplatSizeInBits must divide width!");
646   // We can check that all parts of an integer are equal by making use of a
647   // little trick: rotate and check if it's still the same value.
648   return *this == rotl(SplatSizeInBits);
649 }
650 
651 /// This function returns the high "numBits" bits of this APInt.
652 APInt APInt::getHiBits(unsigned numBits) const {
653   return this->lshr(BitWidth - numBits);
654 }
655 
656 /// This function returns the low "numBits" bits of this APInt.
657 APInt APInt::getLoBits(unsigned numBits) const {
658   APInt Result(getLowBitsSet(BitWidth, numBits));
659   Result &= *this;
660   return Result;
661 }
662 
663 unsigned APInt::countLeadingZerosSlowCase() const {
664   unsigned Count = 0;
665   for (int i = getNumWords()-1; i >= 0; --i) {
666     uint64_t V = pVal[i];
667     if (V == 0)
668       Count += APINT_BITS_PER_WORD;
669     else {
670       Count += llvm::countLeadingZeros(V);
671       break;
672     }
673   }
674   // Adjust for unused bits in the most significant word (they are zero).
675   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
676   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
677   return Count;
678 }
679 
680 unsigned APInt::countLeadingOnes() const {
681   if (isSingleWord())
682     return llvm::countLeadingOnes(VAL << (APINT_BITS_PER_WORD - BitWidth));
683 
684   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
685   unsigned shift;
686   if (!highWordBits) {
687     highWordBits = APINT_BITS_PER_WORD;
688     shift = 0;
689   } else {
690     shift = APINT_BITS_PER_WORD - highWordBits;
691   }
692   int i = getNumWords() - 1;
693   unsigned Count = llvm::countLeadingOnes(pVal[i] << shift);
694   if (Count == highWordBits) {
695     for (i--; i >= 0; --i) {
696       if (pVal[i] == -1ULL)
697         Count += APINT_BITS_PER_WORD;
698       else {
699         Count += llvm::countLeadingOnes(pVal[i]);
700         break;
701       }
702     }
703   }
704   return Count;
705 }
706 
707 unsigned APInt::countTrailingZeros() const {
708   if (isSingleWord())
709     return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth);
710   unsigned Count = 0;
711   unsigned i = 0;
712   for (; i < getNumWords() && pVal[i] == 0; ++i)
713     Count += APINT_BITS_PER_WORD;
714   if (i < getNumWords())
715     Count += llvm::countTrailingZeros(pVal[i]);
716   return std::min(Count, BitWidth);
717 }
718 
719 unsigned APInt::countTrailingOnesSlowCase() const {
720   unsigned Count = 0;
721   unsigned i = 0;
722   for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
723     Count += APINT_BITS_PER_WORD;
724   if (i < getNumWords())
725     Count += llvm::countTrailingOnes(pVal[i]);
726   return std::min(Count, BitWidth);
727 }
728 
729 unsigned APInt::countPopulationSlowCase() const {
730   unsigned Count = 0;
731   for (unsigned i = 0; i < getNumWords(); ++i)
732     Count += llvm::countPopulation(pVal[i]);
733   return Count;
734 }
735 
736 APInt APInt::byteSwap() const {
737   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
738   if (BitWidth == 16)
739     return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
740   if (BitWidth == 32)
741     return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
742   if (BitWidth == 48) {
743     unsigned Tmp1 = unsigned(VAL >> 16);
744     Tmp1 = ByteSwap_32(Tmp1);
745     uint16_t Tmp2 = uint16_t(VAL);
746     Tmp2 = ByteSwap_16(Tmp2);
747     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
748   }
749   if (BitWidth == 64)
750     return APInt(BitWidth, ByteSwap_64(VAL));
751 
752   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
753   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
754     Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
755   if (Result.BitWidth != BitWidth) {
756     Result.lshrInPlace(Result.BitWidth - BitWidth);
757     Result.BitWidth = BitWidth;
758   }
759   return Result;
760 }
761 
762 APInt APInt::reverseBits() const {
763   switch (BitWidth) {
764   case 64:
765     return APInt(BitWidth, llvm::reverseBits<uint64_t>(VAL));
766   case 32:
767     return APInt(BitWidth, llvm::reverseBits<uint32_t>(VAL));
768   case 16:
769     return APInt(BitWidth, llvm::reverseBits<uint16_t>(VAL));
770   case 8:
771     return APInt(BitWidth, llvm::reverseBits<uint8_t>(VAL));
772   default:
773     break;
774   }
775 
776   APInt Val(*this);
777   APInt Reversed(BitWidth, 0);
778   unsigned S = BitWidth;
779 
780   for (; Val != 0; Val.lshrInPlace(1)) {
781     Reversed <<= 1;
782     Reversed |= Val[0];
783     --S;
784   }
785 
786   Reversed <<= S;
787   return Reversed;
788 }
789 
790 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
791   // Fast-path a common case.
792   if (A == B) return A;
793 
794   // Corner cases: if either operand is zero, the other is the gcd.
795   if (!A) return B;
796   if (!B) return A;
797 
798   // Count common powers of 2 and remove all other powers of 2.
799   unsigned Pow2;
800   {
801     unsigned Pow2_A = A.countTrailingZeros();
802     unsigned Pow2_B = B.countTrailingZeros();
803     if (Pow2_A > Pow2_B) {
804       A.lshrInPlace(Pow2_A - Pow2_B);
805       Pow2 = Pow2_B;
806     } else if (Pow2_B > Pow2_A) {
807       B.lshrInPlace(Pow2_B - Pow2_A);
808       Pow2 = Pow2_A;
809     } else {
810       Pow2 = Pow2_A;
811     }
812   }
813 
814   // Both operands are odd multiples of 2^Pow_2:
815   //
816   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
817   //
818   // This is a modified version of Stein's algorithm, taking advantage of
819   // efficient countTrailingZeros().
820   while (A != B) {
821     if (A.ugt(B)) {
822       A -= B;
823       A.lshrInPlace(A.countTrailingZeros() - Pow2);
824     } else {
825       B -= A;
826       B.lshrInPlace(B.countTrailingZeros() - Pow2);
827     }
828   }
829 
830   return A;
831 }
832 
833 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
834   union {
835     double D;
836     uint64_t I;
837   } T;
838   T.D = Double;
839 
840   // Get the sign bit from the highest order bit
841   bool isNeg = T.I >> 63;
842 
843   // Get the 11-bit exponent and adjust for the 1023 bit bias
844   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
845 
846   // If the exponent is negative, the value is < 0 so just return 0.
847   if (exp < 0)
848     return APInt(width, 0u);
849 
850   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
851   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
852 
853   // If the exponent doesn't shift all bits out of the mantissa
854   if (exp < 52)
855     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
856                     APInt(width, mantissa >> (52 - exp));
857 
858   // If the client didn't provide enough bits for us to shift the mantissa into
859   // then the result is undefined, just return 0
860   if (width <= exp - 52)
861     return APInt(width, 0);
862 
863   // Otherwise, we have to shift the mantissa bits up to the right location
864   APInt Tmp(width, mantissa);
865   Tmp = Tmp.shl((unsigned)exp - 52);
866   return isNeg ? -Tmp : Tmp;
867 }
868 
869 /// This function converts this APInt to a double.
870 /// The layout for double is as following (IEEE Standard 754):
871 ///  --------------------------------------
872 /// |  Sign    Exponent    Fraction    Bias |
873 /// |-------------------------------------- |
874 /// |  1[63]   11[62-52]   52[51-00]   1023 |
875 ///  --------------------------------------
876 double APInt::roundToDouble(bool isSigned) const {
877 
878   // Handle the simple case where the value is contained in one uint64_t.
879   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
880   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
881     if (isSigned) {
882       int64_t sext = SignExtend64(getWord(0), BitWidth);
883       return double(sext);
884     } else
885       return double(getWord(0));
886   }
887 
888   // Determine if the value is negative.
889   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
890 
891   // Construct the absolute value if we're negative.
892   APInt Tmp(isNeg ? -(*this) : (*this));
893 
894   // Figure out how many bits we're using.
895   unsigned n = Tmp.getActiveBits();
896 
897   // The exponent (without bias normalization) is just the number of bits
898   // we are using. Note that the sign bit is gone since we constructed the
899   // absolute value.
900   uint64_t exp = n;
901 
902   // Return infinity for exponent overflow
903   if (exp > 1023) {
904     if (!isSigned || !isNeg)
905       return std::numeric_limits<double>::infinity();
906     else
907       return -std::numeric_limits<double>::infinity();
908   }
909   exp += 1023; // Increment for 1023 bias
910 
911   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
912   // extract the high 52 bits from the correct words in pVal.
913   uint64_t mantissa;
914   unsigned hiWord = whichWord(n-1);
915   if (hiWord == 0) {
916     mantissa = Tmp.pVal[0];
917     if (n > 52)
918       mantissa >>= n - 52; // shift down, we want the top 52 bits.
919   } else {
920     assert(hiWord > 0 && "huh?");
921     uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
922     uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
923     mantissa = hibits | lobits;
924   }
925 
926   // The leading bit of mantissa is implicit, so get rid of it.
927   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
928   union {
929     double D;
930     uint64_t I;
931   } T;
932   T.I = sign | (exp << 52) | mantissa;
933   return T.D;
934 }
935 
936 // Truncate to new width.
937 APInt APInt::trunc(unsigned width) const {
938   assert(width < BitWidth && "Invalid APInt Truncate request");
939   assert(width && "Can't truncate to 0 bits");
940 
941   if (width <= APINT_BITS_PER_WORD)
942     return APInt(width, getRawData()[0]);
943 
944   APInt Result(getMemory(getNumWords(width)), width);
945 
946   // Copy full words.
947   unsigned i;
948   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
949     Result.pVal[i] = pVal[i];
950 
951   // Truncate and copy any partial word.
952   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
953   if (bits != 0)
954     Result.pVal[i] = pVal[i] << bits >> bits;
955 
956   return Result;
957 }
958 
959 // Sign extend to a new width.
960 APInt APInt::sext(unsigned width) const {
961   assert(width > BitWidth && "Invalid APInt SignExtend request");
962 
963   if (width <= APINT_BITS_PER_WORD) {
964     uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
965     val = (int64_t)val >> (width - BitWidth);
966     return APInt(width, val >> (APINT_BITS_PER_WORD - width));
967   }
968 
969   APInt Result(getMemory(getNumWords(width)), width);
970 
971   // Copy full words.
972   unsigned i;
973   uint64_t word = 0;
974   for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
975     word = getRawData()[i];
976     Result.pVal[i] = word;
977   }
978 
979   // Read and sign-extend any partial word.
980   unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
981   if (bits != 0)
982     word = (int64_t)getRawData()[i] << bits >> bits;
983   else
984     word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
985 
986   // Write remaining full words.
987   for (; i != width / APINT_BITS_PER_WORD; i++) {
988     Result.pVal[i] = word;
989     word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
990   }
991 
992   // Write any partial word.
993   bits = (0 - width) % APINT_BITS_PER_WORD;
994   if (bits != 0)
995     Result.pVal[i] = word << bits >> bits;
996 
997   return Result;
998 }
999 
1000 //  Zero extend to a new width.
1001 APInt APInt::zext(unsigned width) const {
1002   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1003 
1004   if (width <= APINT_BITS_PER_WORD)
1005     return APInt(width, VAL);
1006 
1007   APInt Result(getMemory(getNumWords(width)), width);
1008 
1009   // Copy words.
1010   unsigned i;
1011   for (i = 0; i != getNumWords(); i++)
1012     Result.pVal[i] = getRawData()[i];
1013 
1014   // Zero remaining words.
1015   memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
1016 
1017   return Result;
1018 }
1019 
1020 APInt APInt::zextOrTrunc(unsigned width) const {
1021   if (BitWidth < width)
1022     return zext(width);
1023   if (BitWidth > width)
1024     return trunc(width);
1025   return *this;
1026 }
1027 
1028 APInt APInt::sextOrTrunc(unsigned width) const {
1029   if (BitWidth < width)
1030     return sext(width);
1031   if (BitWidth > width)
1032     return trunc(width);
1033   return *this;
1034 }
1035 
1036 APInt APInt::zextOrSelf(unsigned width) const {
1037   if (BitWidth < width)
1038     return zext(width);
1039   return *this;
1040 }
1041 
1042 APInt APInt::sextOrSelf(unsigned width) const {
1043   if (BitWidth < width)
1044     return sext(width);
1045   return *this;
1046 }
1047 
1048 /// Arithmetic right-shift this APInt by shiftAmt.
1049 /// @brief Arithmetic right-shift function.
1050 APInt APInt::ashr(const APInt &shiftAmt) const {
1051   return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1052 }
1053 
1054 /// Arithmetic right-shift this APInt by shiftAmt.
1055 /// @brief Arithmetic right-shift function.
1056 APInt APInt::ashr(unsigned shiftAmt) const {
1057   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1058   // Handle a degenerate case
1059   if (shiftAmt == 0)
1060     return *this;
1061 
1062   // Handle single word shifts with built-in ashr
1063   if (isSingleWord()) {
1064     if (shiftAmt == BitWidth)
1065       return APInt(BitWidth, 0); // undefined
1066     return APInt(BitWidth, SignExtend64(VAL, BitWidth) >> shiftAmt);
1067   }
1068 
1069   // If all the bits were shifted out, the result is, technically, undefined.
1070   // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1071   // issues in the algorithm below.
1072   if (shiftAmt == BitWidth) {
1073     if (isNegative())
1074       return APInt(BitWidth, -1ULL, true);
1075     else
1076       return APInt(BitWidth, 0);
1077   }
1078 
1079   // Create some space for the result.
1080   uint64_t * val = new uint64_t[getNumWords()];
1081 
1082   // Compute some values needed by the following shift algorithms
1083   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1084   unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1085   unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1086   unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1087   if (bitsInWord == 0)
1088     bitsInWord = APINT_BITS_PER_WORD;
1089 
1090   // If we are shifting whole words, just move whole words
1091   if (wordShift == 0) {
1092     // Move the words containing significant bits
1093     for (unsigned i = 0; i <= breakWord; ++i)
1094       val[i] = pVal[i+offset]; // move whole word
1095 
1096     // Adjust the top significant word for sign bit fill, if negative
1097     if (isNegative())
1098       if (bitsInWord < APINT_BITS_PER_WORD)
1099         val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1100   } else {
1101     // Shift the low order words
1102     for (unsigned i = 0; i < breakWord; ++i) {
1103       // This combines the shifted corresponding word with the low bits from
1104       // the next word (shifted into this word's high bits).
1105       val[i] = (pVal[i+offset] >> wordShift) |
1106                (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1107     }
1108 
1109     // Shift the break word. In this case there are no bits from the next word
1110     // to include in this word.
1111     val[breakWord] = pVal[breakWord+offset] >> wordShift;
1112 
1113     // Deal with sign extension in the break word, and possibly the word before
1114     // it.
1115     if (isNegative()) {
1116       if (wordShift > bitsInWord) {
1117         if (breakWord > 0)
1118           val[breakWord-1] |=
1119             ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1120         val[breakWord] |= ~0ULL;
1121       } else
1122         val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1123     }
1124   }
1125 
1126   // Remaining words are 0 or -1, just assign them.
1127   uint64_t fillValue = (isNegative() ? -1ULL : 0);
1128   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1129     val[i] = fillValue;
1130   APInt Result(val, BitWidth);
1131   Result.clearUnusedBits();
1132   return Result;
1133 }
1134 
1135 /// Logical right-shift this APInt by shiftAmt.
1136 /// @brief Logical right-shift function.
1137 void APInt::lshrInPlace(const APInt &shiftAmt) {
1138   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1139 }
1140 
1141 /// Logical right-shift this APInt by shiftAmt.
1142 /// @brief Logical right-shift function.
1143 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1144   tcShiftRight(pVal, getNumWords(), ShiftAmt);
1145 }
1146 
1147 /// Left-shift this APInt by shiftAmt.
1148 /// @brief Left-shift function.
1149 APInt APInt::shl(const APInt &shiftAmt) const {
1150   // It's undefined behavior in C to shift by BitWidth or greater.
1151   return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1152 }
1153 
1154 void APInt::shlSlowCase(unsigned ShiftAmt) {
1155   tcShiftLeft(pVal, getNumWords(), ShiftAmt);
1156   clearUnusedBits();
1157 }
1158 
1159 // Calculate the rotate amount modulo the bit width.
1160 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1161   unsigned rotBitWidth = rotateAmt.getBitWidth();
1162   APInt rot = rotateAmt;
1163   if (rotBitWidth < BitWidth) {
1164     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1165     // e.g. APInt(1, 32) would give APInt(1, 0).
1166     rot = rotateAmt.zext(BitWidth);
1167   }
1168   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1169   return rot.getLimitedValue(BitWidth);
1170 }
1171 
1172 APInt APInt::rotl(const APInt &rotateAmt) const {
1173   return rotl(rotateModulo(BitWidth, rotateAmt));
1174 }
1175 
1176 APInt APInt::rotl(unsigned rotateAmt) const {
1177   rotateAmt %= BitWidth;
1178   if (rotateAmt == 0)
1179     return *this;
1180   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1181 }
1182 
1183 APInt APInt::rotr(const APInt &rotateAmt) const {
1184   return rotr(rotateModulo(BitWidth, rotateAmt));
1185 }
1186 
1187 APInt APInt::rotr(unsigned rotateAmt) const {
1188   rotateAmt %= BitWidth;
1189   if (rotateAmt == 0)
1190     return *this;
1191   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1192 }
1193 
1194 // Square Root - this method computes and returns the square root of "this".
1195 // Three mechanisms are used for computation. For small values (<= 5 bits),
1196 // a table lookup is done. This gets some performance for common cases. For
1197 // values using less than 52 bits, the value is converted to double and then
1198 // the libc sqrt function is called. The result is rounded and then converted
1199 // back to a uint64_t which is then used to construct the result. Finally,
1200 // the Babylonian method for computing square roots is used.
1201 APInt APInt::sqrt() const {
1202 
1203   // Determine the magnitude of the value.
1204   unsigned magnitude = getActiveBits();
1205 
1206   // Use a fast table for some small values. This also gets rid of some
1207   // rounding errors in libc sqrt for small values.
1208   if (magnitude <= 5) {
1209     static const uint8_t results[32] = {
1210       /*     0 */ 0,
1211       /*  1- 2 */ 1, 1,
1212       /*  3- 6 */ 2, 2, 2, 2,
1213       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1214       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1215       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1216       /*    31 */ 6
1217     };
1218     return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1219   }
1220 
1221   // If the magnitude of the value fits in less than 52 bits (the precision of
1222   // an IEEE double precision floating point value), then we can use the
1223   // libc sqrt function which will probably use a hardware sqrt computation.
1224   // This should be faster than the algorithm below.
1225   if (magnitude < 52) {
1226     return APInt(BitWidth,
1227                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1228   }
1229 
1230   // Okay, all the short cuts are exhausted. We must compute it. The following
1231   // is a classical Babylonian method for computing the square root. This code
1232   // was adapted to APInt from a wikipedia article on such computations.
1233   // See http://www.wikipedia.org/ and go to the page named
1234   // Calculate_an_integer_square_root.
1235   unsigned nbits = BitWidth, i = 4;
1236   APInt testy(BitWidth, 16);
1237   APInt x_old(BitWidth, 1);
1238   APInt x_new(BitWidth, 0);
1239   APInt two(BitWidth, 2);
1240 
1241   // Select a good starting value using binary logarithms.
1242   for (;; i += 2, testy = testy.shl(2))
1243     if (i >= nbits || this->ule(testy)) {
1244       x_old = x_old.shl(i / 2);
1245       break;
1246     }
1247 
1248   // Use the Babylonian method to arrive at the integer square root:
1249   for (;;) {
1250     x_new = (this->udiv(x_old) + x_old).udiv(two);
1251     if (x_old.ule(x_new))
1252       break;
1253     x_old = x_new;
1254   }
1255 
1256   // Make sure we return the closest approximation
1257   // NOTE: The rounding calculation below is correct. It will produce an
1258   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1259   // determined to be a rounding issue with pari/gp as it begins to use a
1260   // floating point representation after 192 bits. There are no discrepancies
1261   // between this algorithm and pari/gp for bit widths < 192 bits.
1262   APInt square(x_old * x_old);
1263   APInt nextSquare((x_old + 1) * (x_old +1));
1264   if (this->ult(square))
1265     return x_old;
1266   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1267   APInt midpoint((nextSquare - square).udiv(two));
1268   APInt offset(*this - square);
1269   if (offset.ult(midpoint))
1270     return x_old;
1271   return x_old + 1;
1272 }
1273 
1274 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1275 /// iterative extended Euclidean algorithm is used to solve for this value,
1276 /// however we simplify it to speed up calculating only the inverse, and take
1277 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1278 /// (potentially large) APInts around.
1279 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1280   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1281 
1282   // Using the properties listed at the following web page (accessed 06/21/08):
1283   //   http://www.numbertheory.org/php/euclid.html
1284   // (especially the properties numbered 3, 4 and 9) it can be proved that
1285   // BitWidth bits suffice for all the computations in the algorithm implemented
1286   // below. More precisely, this number of bits suffice if the multiplicative
1287   // inverse exists, but may not suffice for the general extended Euclidean
1288   // algorithm.
1289 
1290   APInt r[2] = { modulo, *this };
1291   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1292   APInt q(BitWidth, 0);
1293 
1294   unsigned i;
1295   for (i = 0; r[i^1] != 0; i ^= 1) {
1296     // An overview of the math without the confusing bit-flipping:
1297     // q = r[i-2] / r[i-1]
1298     // r[i] = r[i-2] % r[i-1]
1299     // t[i] = t[i-2] - t[i-1] * q
1300     udivrem(r[i], r[i^1], q, r[i]);
1301     t[i] -= t[i^1] * q;
1302   }
1303 
1304   // If this APInt and the modulo are not coprime, there is no multiplicative
1305   // inverse, so return 0. We check this by looking at the next-to-last
1306   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1307   // algorithm.
1308   if (r[i] != 1)
1309     return APInt(BitWidth, 0);
1310 
1311   // The next-to-last t is the multiplicative inverse.  However, we are
1312   // interested in a positive inverse. Calcuate a positive one from a negative
1313   // one if necessary. A simple addition of the modulo suffices because
1314   // abs(t[i]) is known to be less than *this/2 (see the link above).
1315   return t[i].isNegative() ? t[i] + modulo : t[i];
1316 }
1317 
1318 /// Calculate the magic numbers required to implement a signed integer division
1319 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1320 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1321 /// Warren, Jr., chapter 10.
1322 APInt::ms APInt::magic() const {
1323   const APInt& d = *this;
1324   unsigned p;
1325   APInt ad, anc, delta, q1, r1, q2, r2, t;
1326   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1327   struct ms mag;
1328 
1329   ad = d.abs();
1330   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1331   anc = t - 1 - t.urem(ad);   // absolute value of nc
1332   p = d.getBitWidth() - 1;    // initialize p
1333   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1334   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1335   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1336   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1337   do {
1338     p = p + 1;
1339     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1340     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1341     if (r1.uge(anc)) {  // must be unsigned comparison
1342       q1 = q1 + 1;
1343       r1 = r1 - anc;
1344     }
1345     q2 = q2<<1;          // update q2 = 2p/abs(d)
1346     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1347     if (r2.uge(ad)) {   // must be unsigned comparison
1348       q2 = q2 + 1;
1349       r2 = r2 - ad;
1350     }
1351     delta = ad - r2;
1352   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1353 
1354   mag.m = q2 + 1;
1355   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1356   mag.s = p - d.getBitWidth();          // resulting shift
1357   return mag;
1358 }
1359 
1360 /// Calculate the magic numbers required to implement an unsigned integer
1361 /// division by a constant as a sequence of multiplies, adds and shifts.
1362 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1363 /// S. Warren, Jr., chapter 10.
1364 /// LeadingZeros can be used to simplify the calculation if the upper bits
1365 /// of the divided value are known zero.
1366 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1367   const APInt& d = *this;
1368   unsigned p;
1369   APInt nc, delta, q1, r1, q2, r2;
1370   struct mu magu;
1371   magu.a = 0;               // initialize "add" indicator
1372   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1373   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1374   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1375 
1376   nc = allOnes - (allOnes - d).urem(d);
1377   p = d.getBitWidth() - 1;  // initialize p
1378   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1379   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1380   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1381   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1382   do {
1383     p = p + 1;
1384     if (r1.uge(nc - r1)) {
1385       q1 = q1 + q1 + 1;  // update q1
1386       r1 = r1 + r1 - nc; // update r1
1387     }
1388     else {
1389       q1 = q1+q1; // update q1
1390       r1 = r1+r1; // update r1
1391     }
1392     if ((r2 + 1).uge(d - r2)) {
1393       if (q2.uge(signedMax)) magu.a = 1;
1394       q2 = q2+q2 + 1;     // update q2
1395       r2 = r2+r2 + 1 - d; // update r2
1396     }
1397     else {
1398       if (q2.uge(signedMin)) magu.a = 1;
1399       q2 = q2+q2;     // update q2
1400       r2 = r2+r2 + 1; // update r2
1401     }
1402     delta = d - 1 - r2;
1403   } while (p < d.getBitWidth()*2 &&
1404            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1405   magu.m = q2 + 1; // resulting magic number
1406   magu.s = p - d.getBitWidth();  // resulting shift
1407   return magu;
1408 }
1409 
1410 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1411 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1412 /// variables here have the same names as in the algorithm. Comments explain
1413 /// the algorithm and any deviation from it.
1414 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1415                      unsigned m, unsigned n) {
1416   assert(u && "Must provide dividend");
1417   assert(v && "Must provide divisor");
1418   assert(q && "Must provide quotient");
1419   assert(u != v && u != q && v != q && "Must use different memory");
1420   assert(n>1 && "n must be > 1");
1421 
1422   // b denotes the base of the number system. In our case b is 2^32.
1423   const uint64_t b = uint64_t(1) << 32;
1424 
1425   DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1426   DEBUG(dbgs() << "KnuthDiv: original:");
1427   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1428   DEBUG(dbgs() << " by");
1429   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1430   DEBUG(dbgs() << '\n');
1431   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1432   // u and v by d. Note that we have taken Knuth's advice here to use a power
1433   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1434   // 2 allows us to shift instead of multiply and it is easy to determine the
1435   // shift amount from the leading zeros.  We are basically normalizing the u
1436   // and v so that its high bits are shifted to the top of v's range without
1437   // overflow. Note that this can require an extra word in u so that u must
1438   // be of length m+n+1.
1439   unsigned shift = countLeadingZeros(v[n-1]);
1440   unsigned v_carry = 0;
1441   unsigned u_carry = 0;
1442   if (shift) {
1443     for (unsigned i = 0; i < m+n; ++i) {
1444       unsigned u_tmp = u[i] >> (32 - shift);
1445       u[i] = (u[i] << shift) | u_carry;
1446       u_carry = u_tmp;
1447     }
1448     for (unsigned i = 0; i < n; ++i) {
1449       unsigned v_tmp = v[i] >> (32 - shift);
1450       v[i] = (v[i] << shift) | v_carry;
1451       v_carry = v_tmp;
1452     }
1453   }
1454   u[m+n] = u_carry;
1455 
1456   DEBUG(dbgs() << "KnuthDiv:   normal:");
1457   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1458   DEBUG(dbgs() << " by");
1459   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1460   DEBUG(dbgs() << '\n');
1461 
1462   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1463   int j = m;
1464   do {
1465     DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1466     // D3. [Calculate q'.].
1467     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1468     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1469     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1470     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1471     // on v[n-2] determines at high speed most of the cases in which the trial
1472     // value qp is one too large, and it eliminates all cases where qp is two
1473     // too large.
1474     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1475     DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1476     uint64_t qp = dividend / v[n-1];
1477     uint64_t rp = dividend % v[n-1];
1478     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1479       qp--;
1480       rp += v[n-1];
1481       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1482         qp--;
1483     }
1484     DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1485 
1486     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1487     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1488     // consists of a simple multiplication by a one-place number, combined with
1489     // a subtraction.
1490     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1491     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1492     // true value plus b**(n+1), namely as the b's complement of
1493     // the true value, and a "borrow" to the left should be remembered.
1494     int64_t borrow = 0;
1495     for (unsigned i = 0; i < n; ++i) {
1496       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1497       int64_t subres = int64_t(u[j+i]) - borrow - (unsigned)p;
1498       u[j+i] = (unsigned)subres;
1499       borrow = (p >> 32) - (subres >> 32);
1500       DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i]
1501                    << ", borrow = " << borrow << '\n');
1502     }
1503     bool isNeg = u[j+n] < borrow;
1504     u[j+n] -= (unsigned)borrow;
1505 
1506     DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1507     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1508     DEBUG(dbgs() << '\n');
1509 
1510     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1511     // negative, go to step D6; otherwise go on to step D7.
1512     q[j] = (unsigned)qp;
1513     if (isNeg) {
1514       // D6. [Add back]. The probability that this step is necessary is very
1515       // small, on the order of only 2/b. Make sure that test data accounts for
1516       // this possibility. Decrease q[j] by 1
1517       q[j]--;
1518       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1519       // A carry will occur to the left of u[j+n], and it should be ignored
1520       // since it cancels with the borrow that occurred in D4.
1521       bool carry = false;
1522       for (unsigned i = 0; i < n; i++) {
1523         unsigned limit = std::min(u[j+i],v[i]);
1524         u[j+i] += v[i] + carry;
1525         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1526       }
1527       u[j+n] += carry;
1528     }
1529     DEBUG(dbgs() << "KnuthDiv: after correction:");
1530     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1531     DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1532 
1533   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1534   } while (--j >= 0);
1535 
1536   DEBUG(dbgs() << "KnuthDiv: quotient:");
1537   DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1538   DEBUG(dbgs() << '\n');
1539 
1540   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1541   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1542   // compute the remainder (urem uses this).
1543   if (r) {
1544     // The value d is expressed by the "shift" value above since we avoided
1545     // multiplication by d by using a shift left. So, all we have to do is
1546     // shift right here.
1547     if (shift) {
1548       unsigned carry = 0;
1549       DEBUG(dbgs() << "KnuthDiv: remainder:");
1550       for (int i = n-1; i >= 0; i--) {
1551         r[i] = (u[i] >> shift) | carry;
1552         carry = u[i] << (32 - shift);
1553         DEBUG(dbgs() << " " << r[i]);
1554       }
1555     } else {
1556       for (int i = n-1; i >= 0; i--) {
1557         r[i] = u[i];
1558         DEBUG(dbgs() << " " << r[i]);
1559       }
1560     }
1561     DEBUG(dbgs() << '\n');
1562   }
1563   DEBUG(dbgs() << '\n');
1564 }
1565 
1566 void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
1567                    unsigned rhsWords, APInt *Quotient, APInt *Remainder) {
1568   assert(lhsWords >= rhsWords && "Fractional result");
1569 
1570   // First, compose the values into an array of 32-bit words instead of
1571   // 64-bit words. This is a necessity of both the "short division" algorithm
1572   // and the Knuth "classical algorithm" which requires there to be native
1573   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1574   // can't use 64-bit operands here because we don't have native results of
1575   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1576   // work on large-endian machines.
1577   uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1578   unsigned n = rhsWords * 2;
1579   unsigned m = (lhsWords * 2) - n;
1580 
1581   // Allocate space for the temporary values we need either on the stack, if
1582   // it will fit, or on the heap if it won't.
1583   unsigned SPACE[128];
1584   unsigned *U = nullptr;
1585   unsigned *V = nullptr;
1586   unsigned *Q = nullptr;
1587   unsigned *R = nullptr;
1588   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1589     U = &SPACE[0];
1590     V = &SPACE[m+n+1];
1591     Q = &SPACE[(m+n+1) + n];
1592     if (Remainder)
1593       R = &SPACE[(m+n+1) + n + (m+n)];
1594   } else {
1595     U = new unsigned[m + n + 1];
1596     V = new unsigned[n];
1597     Q = new unsigned[m+n];
1598     if (Remainder)
1599       R = new unsigned[n];
1600   }
1601 
1602   // Initialize the dividend
1603   memset(U, 0, (m+n+1)*sizeof(unsigned));
1604   for (unsigned i = 0; i < lhsWords; ++i) {
1605     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1606     U[i * 2] = (unsigned)(tmp & mask);
1607     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1608   }
1609   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1610 
1611   // Initialize the divisor
1612   memset(V, 0, (n)*sizeof(unsigned));
1613   for (unsigned i = 0; i < rhsWords; ++i) {
1614     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1615     V[i * 2] = (unsigned)(tmp & mask);
1616     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1617   }
1618 
1619   // initialize the quotient and remainder
1620   memset(Q, 0, (m+n) * sizeof(unsigned));
1621   if (Remainder)
1622     memset(R, 0, n * sizeof(unsigned));
1623 
1624   // Now, adjust m and n for the Knuth division. n is the number of words in
1625   // the divisor. m is the number of words by which the dividend exceeds the
1626   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1627   // contain any zero words or the Knuth algorithm fails.
1628   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1629     n--;
1630     m++;
1631   }
1632   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1633     m--;
1634 
1635   // If we're left with only a single word for the divisor, Knuth doesn't work
1636   // so we implement the short division algorithm here. This is much simpler
1637   // and faster because we are certain that we can divide a 64-bit quantity
1638   // by a 32-bit quantity at hardware speed and short division is simply a
1639   // series of such operations. This is just like doing short division but we
1640   // are using base 2^32 instead of base 10.
1641   assert(n != 0 && "Divide by zero?");
1642   if (n == 1) {
1643     unsigned divisor = V[0];
1644     unsigned remainder = 0;
1645     for (int i = m+n-1; i >= 0; i--) {
1646       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1647       if (partial_dividend == 0) {
1648         Q[i] = 0;
1649         remainder = 0;
1650       } else if (partial_dividend < divisor) {
1651         Q[i] = 0;
1652         remainder = (unsigned)partial_dividend;
1653       } else if (partial_dividend == divisor) {
1654         Q[i] = 1;
1655         remainder = 0;
1656       } else {
1657         Q[i] = (unsigned)(partial_dividend / divisor);
1658         remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1659       }
1660     }
1661     if (R)
1662       R[0] = remainder;
1663   } else {
1664     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1665     // case n > 1.
1666     KnuthDiv(U, V, Q, R, m, n);
1667   }
1668 
1669   // If the caller wants the quotient
1670   if (Quotient) {
1671     // Set up the Quotient value's memory.
1672     if (Quotient->BitWidth != LHS.BitWidth) {
1673       if (Quotient->isSingleWord())
1674         Quotient->VAL = 0;
1675       else
1676         delete [] Quotient->pVal;
1677       Quotient->BitWidth = LHS.BitWidth;
1678       if (!Quotient->isSingleWord())
1679         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1680     } else
1681       Quotient->clearAllBits();
1682 
1683     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1684     // order words.
1685     // This case is currently dead as all users of divide() handle trivial cases
1686     // earlier.
1687     if (lhsWords == 1) {
1688       uint64_t tmp =
1689         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1690       if (Quotient->isSingleWord())
1691         Quotient->VAL = tmp;
1692       else
1693         Quotient->pVal[0] = tmp;
1694     } else {
1695       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1696       for (unsigned i = 0; i < lhsWords; ++i)
1697         Quotient->pVal[i] =
1698           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1699     }
1700   }
1701 
1702   // If the caller wants the remainder
1703   if (Remainder) {
1704     // Set up the Remainder value's memory.
1705     if (Remainder->BitWidth != RHS.BitWidth) {
1706       if (Remainder->isSingleWord())
1707         Remainder->VAL = 0;
1708       else
1709         delete [] Remainder->pVal;
1710       Remainder->BitWidth = RHS.BitWidth;
1711       if (!Remainder->isSingleWord())
1712         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1713     } else
1714       Remainder->clearAllBits();
1715 
1716     // The remainder is in R. Reconstitute the remainder into Remainder's low
1717     // order words.
1718     if (rhsWords == 1) {
1719       uint64_t tmp =
1720         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1721       if (Remainder->isSingleWord())
1722         Remainder->VAL = tmp;
1723       else
1724         Remainder->pVal[0] = tmp;
1725     } else {
1726       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1727       for (unsigned i = 0; i < rhsWords; ++i)
1728         Remainder->pVal[i] =
1729           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1730     }
1731   }
1732 
1733   // Clean up the memory we allocated.
1734   if (U != &SPACE[0]) {
1735     delete [] U;
1736     delete [] V;
1737     delete [] Q;
1738     delete [] R;
1739   }
1740 }
1741 
1742 APInt APInt::udiv(const APInt& RHS) const {
1743   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1744 
1745   // First, deal with the easy case
1746   if (isSingleWord()) {
1747     assert(RHS.VAL != 0 && "Divide by zero?");
1748     return APInt(BitWidth, VAL / RHS.VAL);
1749   }
1750 
1751   // Get some facts about the LHS and RHS number of bits and words
1752   unsigned rhsBits = RHS.getActiveBits();
1753   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1754   assert(rhsWords && "Divided by zero???");
1755   unsigned lhsBits = this->getActiveBits();
1756   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1757 
1758   // Deal with some degenerate cases
1759   if (!lhsWords)
1760     // 0 / X ===> 0
1761     return APInt(BitWidth, 0);
1762   else if (lhsWords < rhsWords || this->ult(RHS)) {
1763     // X / Y ===> 0, iff X < Y
1764     return APInt(BitWidth, 0);
1765   } else if (*this == RHS) {
1766     // X / X ===> 1
1767     return APInt(BitWidth, 1);
1768   } else if (lhsWords == 1 && rhsWords == 1) {
1769     // All high words are zero, just use native divide
1770     return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1771   }
1772 
1773   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1774   APInt Quotient(1,0); // to hold result.
1775   divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr);
1776   return Quotient;
1777 }
1778 
1779 APInt APInt::sdiv(const APInt &RHS) const {
1780   if (isNegative()) {
1781     if (RHS.isNegative())
1782       return (-(*this)).udiv(-RHS);
1783     return -((-(*this)).udiv(RHS));
1784   }
1785   if (RHS.isNegative())
1786     return -(this->udiv(-RHS));
1787   return this->udiv(RHS);
1788 }
1789 
1790 APInt APInt::urem(const APInt& RHS) const {
1791   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1792   if (isSingleWord()) {
1793     assert(RHS.VAL != 0 && "Remainder by zero?");
1794     return APInt(BitWidth, VAL % RHS.VAL);
1795   }
1796 
1797   // Get some facts about the LHS
1798   unsigned lhsBits = getActiveBits();
1799   unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1800 
1801   // Get some facts about the RHS
1802   unsigned rhsBits = RHS.getActiveBits();
1803   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1804   assert(rhsWords && "Performing remainder operation by zero ???");
1805 
1806   // Check the degenerate cases
1807   if (lhsWords == 0) {
1808     // 0 % Y ===> 0
1809     return APInt(BitWidth, 0);
1810   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1811     // X % Y ===> X, iff X < Y
1812     return *this;
1813   } else if (*this == RHS) {
1814     // X % X == 0;
1815     return APInt(BitWidth, 0);
1816   } else if (lhsWords == 1) {
1817     // All high words are zero, just use native remainder
1818     return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1819   }
1820 
1821   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1822   APInt Remainder(1,0);
1823   divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder);
1824   return Remainder;
1825 }
1826 
1827 APInt APInt::srem(const APInt &RHS) const {
1828   if (isNegative()) {
1829     if (RHS.isNegative())
1830       return -((-(*this)).urem(-RHS));
1831     return -((-(*this)).urem(RHS));
1832   }
1833   if (RHS.isNegative())
1834     return this->urem(-RHS);
1835   return this->urem(RHS);
1836 }
1837 
1838 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1839                     APInt &Quotient, APInt &Remainder) {
1840   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1841 
1842   // First, deal with the easy case
1843   if (LHS.isSingleWord()) {
1844     assert(RHS.VAL != 0 && "Divide by zero?");
1845     uint64_t QuotVal = LHS.VAL / RHS.VAL;
1846     uint64_t RemVal = LHS.VAL % RHS.VAL;
1847     Quotient = APInt(LHS.BitWidth, QuotVal);
1848     Remainder = APInt(LHS.BitWidth, RemVal);
1849     return;
1850   }
1851 
1852   // Get some size facts about the dividend and divisor
1853   unsigned lhsBits  = LHS.getActiveBits();
1854   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1855   unsigned rhsBits  = RHS.getActiveBits();
1856   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1857 
1858   // Check the degenerate cases
1859   if (lhsWords == 0) {
1860     Quotient = 0;                // 0 / Y ===> 0
1861     Remainder = 0;               // 0 % Y ===> 0
1862     return;
1863   }
1864 
1865   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1866     Remainder = LHS;            // X % Y ===> X, iff X < Y
1867     Quotient = 0;               // X / Y ===> 0, iff X < Y
1868     return;
1869   }
1870 
1871   if (LHS == RHS) {
1872     Quotient  = 1;              // X / X ===> 1
1873     Remainder = 0;              // X % X ===> 0;
1874     return;
1875   }
1876 
1877   if (lhsWords == 1 && rhsWords == 1) {
1878     // There is only one word to consider so use the native versions.
1879     uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
1880     uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
1881     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1882     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1883     return;
1884   }
1885 
1886   // Okay, lets do it the long way
1887   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
1888 }
1889 
1890 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1891                     APInt &Quotient, APInt &Remainder) {
1892   if (LHS.isNegative()) {
1893     if (RHS.isNegative())
1894       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1895     else {
1896       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1897       Quotient = -Quotient;
1898     }
1899     Remainder = -Remainder;
1900   } else if (RHS.isNegative()) {
1901     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1902     Quotient = -Quotient;
1903   } else {
1904     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1905   }
1906 }
1907 
1908 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1909   APInt Res = *this+RHS;
1910   Overflow = isNonNegative() == RHS.isNonNegative() &&
1911              Res.isNonNegative() != isNonNegative();
1912   return Res;
1913 }
1914 
1915 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1916   APInt Res = *this+RHS;
1917   Overflow = Res.ult(RHS);
1918   return Res;
1919 }
1920 
1921 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1922   APInt Res = *this - RHS;
1923   Overflow = isNonNegative() != RHS.isNonNegative() &&
1924              Res.isNonNegative() != isNonNegative();
1925   return Res;
1926 }
1927 
1928 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1929   APInt Res = *this-RHS;
1930   Overflow = Res.ugt(*this);
1931   return Res;
1932 }
1933 
1934 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1935   // MININT/-1  -->  overflow.
1936   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1937   return sdiv(RHS);
1938 }
1939 
1940 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1941   APInt Res = *this * RHS;
1942 
1943   if (*this != 0 && RHS != 0)
1944     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1945   else
1946     Overflow = false;
1947   return Res;
1948 }
1949 
1950 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1951   APInt Res = *this * RHS;
1952 
1953   if (*this != 0 && RHS != 0)
1954     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1955   else
1956     Overflow = false;
1957   return Res;
1958 }
1959 
1960 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1961   Overflow = ShAmt.uge(getBitWidth());
1962   if (Overflow)
1963     return APInt(BitWidth, 0);
1964 
1965   if (isNonNegative()) // Don't allow sign change.
1966     Overflow = ShAmt.uge(countLeadingZeros());
1967   else
1968     Overflow = ShAmt.uge(countLeadingOnes());
1969 
1970   return *this << ShAmt;
1971 }
1972 
1973 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1974   Overflow = ShAmt.uge(getBitWidth());
1975   if (Overflow)
1976     return APInt(BitWidth, 0);
1977 
1978   Overflow = ShAmt.ugt(countLeadingZeros());
1979 
1980   return *this << ShAmt;
1981 }
1982 
1983 
1984 
1985 
1986 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1987   // Check our assumptions here
1988   assert(!str.empty() && "Invalid string length");
1989   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1990           radix == 36) &&
1991          "Radix should be 2, 8, 10, 16, or 36!");
1992 
1993   StringRef::iterator p = str.begin();
1994   size_t slen = str.size();
1995   bool isNeg = *p == '-';
1996   if (*p == '-' || *p == '+') {
1997     p++;
1998     slen--;
1999     assert(slen && "String is only a sign, needs a value.");
2000   }
2001   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2002   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2003   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2004   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2005          "Insufficient bit width");
2006 
2007   // Allocate memory
2008   if (!isSingleWord())
2009     pVal = getClearedMemory(getNumWords());
2010 
2011   // Figure out if we can shift instead of multiply
2012   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2013 
2014   // Set up an APInt for the radix multiplier outside the loop so we don't
2015   // constantly construct/destruct it.
2016   APInt apradix(getBitWidth(), radix);
2017 
2018   // Enter digit traversal loop
2019   for (StringRef::iterator e = str.end(); p != e; ++p) {
2020     unsigned digit = getDigit(*p, radix);
2021     assert(digit < radix && "Invalid character in digit string");
2022 
2023     // Shift or multiply the value by the radix
2024     if (slen > 1) {
2025       if (shift)
2026         *this <<= shift;
2027       else
2028         *this *= apradix;
2029     }
2030 
2031     // Add in the digit we just interpreted
2032     *this += digit;
2033   }
2034   // If its negative, put it in two's complement form
2035   if (isNeg) {
2036     --(*this);
2037     this->flipAllBits();
2038   }
2039 }
2040 
2041 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2042                      bool Signed, bool formatAsCLiteral) const {
2043   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2044           Radix == 36) &&
2045          "Radix should be 2, 8, 10, 16, or 36!");
2046 
2047   const char *Prefix = "";
2048   if (formatAsCLiteral) {
2049     switch (Radix) {
2050       case 2:
2051         // Binary literals are a non-standard extension added in gcc 4.3:
2052         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2053         Prefix = "0b";
2054         break;
2055       case 8:
2056         Prefix = "0";
2057         break;
2058       case 10:
2059         break; // No prefix
2060       case 16:
2061         Prefix = "0x";
2062         break;
2063       default:
2064         llvm_unreachable("Invalid radix!");
2065     }
2066   }
2067 
2068   // First, check for a zero value and just short circuit the logic below.
2069   if (*this == 0) {
2070     while (*Prefix) {
2071       Str.push_back(*Prefix);
2072       ++Prefix;
2073     };
2074     Str.push_back('0');
2075     return;
2076   }
2077 
2078   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2079 
2080   if (isSingleWord()) {
2081     char Buffer[65];
2082     char *BufPtr = Buffer+65;
2083 
2084     uint64_t N;
2085     if (!Signed) {
2086       N = getZExtValue();
2087     } else {
2088       int64_t I = getSExtValue();
2089       if (I >= 0) {
2090         N = I;
2091       } else {
2092         Str.push_back('-');
2093         N = -(uint64_t)I;
2094       }
2095     }
2096 
2097     while (*Prefix) {
2098       Str.push_back(*Prefix);
2099       ++Prefix;
2100     };
2101 
2102     while (N) {
2103       *--BufPtr = Digits[N % Radix];
2104       N /= Radix;
2105     }
2106     Str.append(BufPtr, Buffer+65);
2107     return;
2108   }
2109 
2110   APInt Tmp(*this);
2111 
2112   if (Signed && isNegative()) {
2113     // They want to print the signed version and it is a negative value
2114     // Flip the bits and add one to turn it into the equivalent positive
2115     // value and put a '-' in the result.
2116     Tmp.flipAllBits();
2117     ++Tmp;
2118     Str.push_back('-');
2119   }
2120 
2121   while (*Prefix) {
2122     Str.push_back(*Prefix);
2123     ++Prefix;
2124   };
2125 
2126   // We insert the digits backward, then reverse them to get the right order.
2127   unsigned StartDig = Str.size();
2128 
2129   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2130   // because the number of bits per digit (1, 3 and 4 respectively) divides
2131   // equally.  We just shift until the value is zero.
2132   if (Radix == 2 || Radix == 8 || Radix == 16) {
2133     // Just shift tmp right for each digit width until it becomes zero
2134     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2135     unsigned MaskAmt = Radix - 1;
2136 
2137     while (Tmp != 0) {
2138       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2139       Str.push_back(Digits[Digit]);
2140       Tmp.lshrInPlace(ShiftAmt);
2141     }
2142   } else {
2143     APInt divisor(Radix == 10? 4 : 8, Radix);
2144     while (Tmp != 0) {
2145       APInt APdigit(1, 0);
2146       APInt tmp2(Tmp.getBitWidth(), 0);
2147       divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2148              &APdigit);
2149       unsigned Digit = (unsigned)APdigit.getZExtValue();
2150       assert(Digit < Radix && "divide failed");
2151       Str.push_back(Digits[Digit]);
2152       Tmp = tmp2;
2153     }
2154   }
2155 
2156   // Reverse the digits before returning.
2157   std::reverse(Str.begin()+StartDig, Str.end());
2158 }
2159 
2160 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2161 /// It is better to pass in a SmallVector/SmallString to the methods above.
2162 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2163   SmallString<40> S;
2164   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2165   return S.str();
2166 }
2167 
2168 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2169 LLVM_DUMP_METHOD void APInt::dump() const {
2170   SmallString<40> S, U;
2171   this->toStringUnsigned(U);
2172   this->toStringSigned(S);
2173   dbgs() << "APInt(" << BitWidth << "b, "
2174          << U << "u " << S << "s)\n";
2175 }
2176 #endif
2177 
2178 void APInt::print(raw_ostream &OS, bool isSigned) const {
2179   SmallString<40> S;
2180   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2181   OS << S;
2182 }
2183 
2184 // This implements a variety of operations on a representation of
2185 // arbitrary precision, two's-complement, bignum integer values.
2186 
2187 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2188 // and unrestricting assumption.
2189 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2190               "Part width must be divisible by 2!");
2191 
2192 /* Some handy functions local to this file.  */
2193 
2194 /* Returns the integer part with the least significant BITS set.
2195    BITS cannot be zero.  */
2196 static inline APInt::WordType lowBitMask(unsigned bits) {
2197   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2198 
2199   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2200 }
2201 
2202 /* Returns the value of the lower half of PART.  */
2203 static inline APInt::WordType lowHalf(APInt::WordType part) {
2204   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2205 }
2206 
2207 /* Returns the value of the upper half of PART.  */
2208 static inline APInt::WordType highHalf(APInt::WordType part) {
2209   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2210 }
2211 
2212 /* Returns the bit number of the most significant set bit of a part.
2213    If the input number has no bits set -1U is returned.  */
2214 static unsigned partMSB(APInt::WordType value) {
2215   return findLastSet(value, ZB_Max);
2216 }
2217 
2218 /* Returns the bit number of the least significant set bit of a
2219    part.  If the input number has no bits set -1U is returned.  */
2220 static unsigned partLSB(APInt::WordType value) {
2221   return findFirstSet(value, ZB_Max);
2222 }
2223 
2224 /* Sets the least significant part of a bignum to the input value, and
2225    zeroes out higher parts.  */
2226 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2227   assert(parts > 0);
2228 
2229   dst[0] = part;
2230   for (unsigned i = 1; i < parts; i++)
2231     dst[i] = 0;
2232 }
2233 
2234 /* Assign one bignum to another.  */
2235 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2236   for (unsigned i = 0; i < parts; i++)
2237     dst[i] = src[i];
2238 }
2239 
2240 /* Returns true if a bignum is zero, false otherwise.  */
2241 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2242   for (unsigned i = 0; i < parts; i++)
2243     if (src[i])
2244       return false;
2245 
2246   return true;
2247 }
2248 
2249 /* Extract the given bit of a bignum; returns 0 or 1.  */
2250 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2251   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2252 }
2253 
2254 /* Set the given bit of a bignum. */
2255 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2256   parts[whichWord(bit)] |= maskBit(bit);
2257 }
2258 
2259 /* Clears the given bit of a bignum. */
2260 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2261   parts[whichWord(bit)] &= ~maskBit(bit);
2262 }
2263 
2264 /* Returns the bit number of the least significant set bit of a
2265    number.  If the input number has no bits set -1U is returned.  */
2266 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2267   for (unsigned i = 0; i < n; i++) {
2268     if (parts[i] != 0) {
2269       unsigned lsb = partLSB(parts[i]);
2270 
2271       return lsb + i * APINT_BITS_PER_WORD;
2272     }
2273   }
2274 
2275   return -1U;
2276 }
2277 
2278 /* Returns the bit number of the most significant set bit of a number.
2279    If the input number has no bits set -1U is returned.  */
2280 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2281   do {
2282     --n;
2283 
2284     if (parts[n] != 0) {
2285       unsigned msb = partMSB(parts[n]);
2286 
2287       return msb + n * APINT_BITS_PER_WORD;
2288     }
2289   } while (n);
2290 
2291   return -1U;
2292 }
2293 
2294 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2295    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2296    the least significant bit of DST.  All high bits above srcBITS in
2297    DST are zero-filled.  */
2298 void
2299 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2300                  unsigned srcBits, unsigned srcLSB) {
2301   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2302   assert(dstParts <= dstCount);
2303 
2304   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2305   tcAssign (dst, src + firstSrcPart, dstParts);
2306 
2307   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2308   tcShiftRight (dst, dstParts, shift);
2309 
2310   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2311      in DST.  If this is less that srcBits, append the rest, else
2312      clear the high bits.  */
2313   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2314   if (n < srcBits) {
2315     WordType mask = lowBitMask (srcBits - n);
2316     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2317                           << n % APINT_BITS_PER_WORD);
2318   } else if (n > srcBits) {
2319     if (srcBits % APINT_BITS_PER_WORD)
2320       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2321   }
2322 
2323   /* Clear high parts.  */
2324   while (dstParts < dstCount)
2325     dst[dstParts++] = 0;
2326 }
2327 
2328 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2329 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2330                              WordType c, unsigned parts) {
2331   assert(c <= 1);
2332 
2333   for (unsigned i = 0; i < parts; i++) {
2334     WordType l = dst[i];
2335     if (c) {
2336       dst[i] += rhs[i] + 1;
2337       c = (dst[i] <= l);
2338     } else {
2339       dst[i] += rhs[i];
2340       c = (dst[i] < l);
2341     }
2342   }
2343 
2344   return c;
2345 }
2346 
2347 /// This function adds a single "word" integer, src, to the multiple
2348 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2349 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2350 /// @returns the carry of the addition.
2351 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2352                                  unsigned parts) {
2353   for (unsigned i = 0; i < parts; ++i) {
2354     dst[i] += src;
2355     if (dst[i] >= src)
2356       return 0; // No need to carry so exit early.
2357     src = 1; // Carry one to next digit.
2358   }
2359 
2360   return 1;
2361 }
2362 
2363 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2364 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2365                                   WordType c, unsigned parts) {
2366   assert(c <= 1);
2367 
2368   for (unsigned i = 0; i < parts; i++) {
2369     WordType l = dst[i];
2370     if (c) {
2371       dst[i] -= rhs[i] + 1;
2372       c = (dst[i] >= l);
2373     } else {
2374       dst[i] -= rhs[i];
2375       c = (dst[i] > l);
2376     }
2377   }
2378 
2379   return c;
2380 }
2381 
2382 /// This function subtracts a single "word" (64-bit word), src, from
2383 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2384 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2385 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2386 /// exhausted. In other words, if src > dst then this function returns 1,
2387 /// otherwise 0.
2388 /// @returns the borrow out of the subtraction
2389 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2390                                       unsigned parts) {
2391   for (unsigned i = 0; i < parts; ++i) {
2392     WordType Dst = dst[i];
2393     dst[i] -= src;
2394     if (src <= Dst)
2395       return 0; // No need to borrow so exit early.
2396     src = 1; // We have to "borrow 1" from next "word"
2397   }
2398 
2399   return 1;
2400 }
2401 
2402 /* Negate a bignum in-place.  */
2403 void APInt::tcNegate(WordType *dst, unsigned parts) {
2404   tcComplement(dst, parts);
2405   tcIncrement(dst, parts);
2406 }
2407 
2408 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2409     DST  = SRC * MULTIPLIER + CARRY   if add is false
2410 
2411     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2412     they must start at the same point, i.e. DST == SRC.
2413 
2414     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2415     returned.  Otherwise DST is filled with the least significant
2416     DSTPARTS parts of the result, and if all of the omitted higher
2417     parts were zero return zero, otherwise overflow occurred and
2418     return one.  */
2419 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2420                           WordType multiplier, WordType carry,
2421                           unsigned srcParts, unsigned dstParts,
2422                           bool add) {
2423   /* Otherwise our writes of DST kill our later reads of SRC.  */
2424   assert(dst <= src || dst >= src + srcParts);
2425   assert(dstParts <= srcParts + 1);
2426 
2427   /* N loops; minimum of dstParts and srcParts.  */
2428   unsigned n = dstParts < srcParts ? dstParts: srcParts;
2429 
2430   unsigned i;
2431   for (i = 0; i < n; i++) {
2432     WordType low, mid, high, srcPart;
2433 
2434       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2435 
2436          This cannot overflow, because
2437 
2438          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2439 
2440          which is less than n^2.  */
2441 
2442     srcPart = src[i];
2443 
2444     if (multiplier == 0 || srcPart == 0) {
2445       low = carry;
2446       high = 0;
2447     } else {
2448       low = lowHalf(srcPart) * lowHalf(multiplier);
2449       high = highHalf(srcPart) * highHalf(multiplier);
2450 
2451       mid = lowHalf(srcPart) * highHalf(multiplier);
2452       high += highHalf(mid);
2453       mid <<= APINT_BITS_PER_WORD / 2;
2454       if (low + mid < low)
2455         high++;
2456       low += mid;
2457 
2458       mid = highHalf(srcPart) * lowHalf(multiplier);
2459       high += highHalf(mid);
2460       mid <<= APINT_BITS_PER_WORD / 2;
2461       if (low + mid < low)
2462         high++;
2463       low += mid;
2464 
2465       /* Now add carry.  */
2466       if (low + carry < low)
2467         high++;
2468       low += carry;
2469     }
2470 
2471     if (add) {
2472       /* And now DST[i], and store the new low part there.  */
2473       if (low + dst[i] < low)
2474         high++;
2475       dst[i] += low;
2476     } else
2477       dst[i] = low;
2478 
2479     carry = high;
2480   }
2481 
2482   if (i < dstParts) {
2483     /* Full multiplication, there is no overflow.  */
2484     assert(i + 1 == dstParts);
2485     dst[i] = carry;
2486     return 0;
2487   } else {
2488     /* We overflowed if there is carry.  */
2489     if (carry)
2490       return 1;
2491 
2492     /* We would overflow if any significant unwritten parts would be
2493        non-zero.  This is true if any remaining src parts are non-zero
2494        and the multiplier is non-zero.  */
2495     if (multiplier)
2496       for (; i < srcParts; i++)
2497         if (src[i])
2498           return 1;
2499 
2500     /* We fitted in the narrow destination.  */
2501     return 0;
2502   }
2503 }
2504 
2505 /* DST = LHS * RHS, where DST has the same width as the operands and
2506    is filled with the least significant parts of the result.  Returns
2507    one if overflow occurred, otherwise zero.  DST must be disjoint
2508    from both operands.  */
2509 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2510                       const WordType *rhs, unsigned parts) {
2511   assert(dst != lhs && dst != rhs);
2512 
2513   int overflow = 0;
2514   tcSet(dst, 0, parts);
2515 
2516   for (unsigned i = 0; i < parts; i++)
2517     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2518                                parts - i, true);
2519 
2520   return overflow;
2521 }
2522 
2523 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2524    operands.  No overflow occurs.  DST must be disjoint from both
2525    operands.  Returns the number of parts required to hold the
2526    result.  */
2527 unsigned APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2528                                const WordType *rhs, unsigned lhsParts,
2529                                unsigned rhsParts) {
2530   /* Put the narrower number on the LHS for less loops below.  */
2531   if (lhsParts > rhsParts) {
2532     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2533   } else {
2534     assert(dst != lhs && dst != rhs);
2535 
2536     tcSet(dst, 0, rhsParts);
2537 
2538     for (unsigned i = 0; i < lhsParts; i++)
2539       tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2540 
2541     unsigned n = lhsParts + rhsParts;
2542 
2543     return n - (dst[n - 1] == 0);
2544   }
2545 }
2546 
2547 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2548    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2549    set REMAINDER to the remainder, return zero.  i.e.
2550 
2551    OLD_LHS = RHS * LHS + REMAINDER
2552 
2553    SCRATCH is a bignum of the same size as the operands and result for
2554    use by the routine; its contents need not be initialized and are
2555    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2556 */
2557 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2558                     WordType *remainder, WordType *srhs,
2559                     unsigned parts) {
2560   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2561 
2562   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2563   if (shiftCount == 0)
2564     return true;
2565 
2566   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2567   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2568   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2569 
2570   tcAssign(srhs, rhs, parts);
2571   tcShiftLeft(srhs, parts, shiftCount);
2572   tcAssign(remainder, lhs, parts);
2573   tcSet(lhs, 0, parts);
2574 
2575   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2576      the total.  */
2577   for (;;) {
2578       int compare;
2579 
2580       compare = tcCompare(remainder, srhs, parts);
2581       if (compare >= 0) {
2582         tcSubtract(remainder, srhs, 0, parts);
2583         lhs[n] |= mask;
2584       }
2585 
2586       if (shiftCount == 0)
2587         break;
2588       shiftCount--;
2589       tcShiftRight(srhs, parts, 1);
2590       if ((mask >>= 1) == 0) {
2591         mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2592         n--;
2593       }
2594   }
2595 
2596   return false;
2597 }
2598 
2599 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2600 /// no restrictions on Count.
2601 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2602   // Don't bother performing a no-op shift.
2603   if (!Count)
2604     return;
2605 
2606   /* WordShift is the inter-part shift; BitShift is is intra-part shift.  */
2607   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2608   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2609 
2610   // Fastpath for moving by whole words.
2611   if (BitShift == 0) {
2612     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2613   } else {
2614     while (Words-- > WordShift) {
2615       Dst[Words] = Dst[Words - WordShift] << BitShift;
2616       if (Words > WordShift)
2617         Dst[Words] |=
2618           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2619     }
2620   }
2621 
2622   // Fill in the remainder with 0s.
2623   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2624 }
2625 
2626 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2627 /// are no restrictions on Count.
2628 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2629   // Don't bother performing a no-op shift.
2630   if (!Count)
2631     return;
2632 
2633   // WordShift is the inter-part shift; BitShift is is intra-part shift.
2634   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2635   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2636 
2637   unsigned WordsToMove = Words - WordShift;
2638   // Fastpath for moving by whole words.
2639   if (BitShift == 0) {
2640     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2641   } else {
2642     for (unsigned i = 0; i != WordsToMove; ++i) {
2643       Dst[i] = Dst[i + WordShift] >> BitShift;
2644       if (i + 1 != WordsToMove)
2645         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2646     }
2647   }
2648 
2649   // Fill in the remainder with 0s.
2650   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2651 }
2652 
2653 /* Bitwise and of two bignums.  */
2654 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2655   for (unsigned i = 0; i < parts; i++)
2656     dst[i] &= rhs[i];
2657 }
2658 
2659 /* Bitwise inclusive or of two bignums.  */
2660 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2661   for (unsigned i = 0; i < parts; i++)
2662     dst[i] |= rhs[i];
2663 }
2664 
2665 /* Bitwise exclusive or of two bignums.  */
2666 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2667   for (unsigned i = 0; i < parts; i++)
2668     dst[i] ^= rhs[i];
2669 }
2670 
2671 /* Complement a bignum in-place.  */
2672 void APInt::tcComplement(WordType *dst, unsigned parts) {
2673   for (unsigned i = 0; i < parts; i++)
2674     dst[i] = ~dst[i];
2675 }
2676 
2677 /* Comparison (unsigned) of two bignums.  */
2678 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2679                      unsigned parts) {
2680   while (parts) {
2681     parts--;
2682     if (lhs[parts] == rhs[parts])
2683       continue;
2684 
2685     return (lhs[parts] > rhs[parts]) ? 1 : -1;
2686   }
2687 
2688   return 0;
2689 }
2690 
2691 /* Set the least significant BITS bits of a bignum, clear the
2692    rest.  */
2693 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2694                                       unsigned bits) {
2695   unsigned i = 0;
2696   while (bits > APINT_BITS_PER_WORD) {
2697     dst[i++] = ~(WordType) 0;
2698     bits -= APINT_BITS_PER_WORD;
2699   }
2700 
2701   if (bits)
2702     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2703 
2704   while (i < parts)
2705     dst[i++] = 0;
2706 }
2707