1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision integer
11 // constant values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "apint"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <cmath>
25 #include <limits>
26 #include <cstring>
27 #include <cstdlib>
28 using namespace llvm;
29 
30 /// A utility function for allocating memory, checking for allocation failures,
31 /// and ensuring the contents are zeroed.
32 inline static uint64_t* getClearedMemory(unsigned numWords) {
33   uint64_t * result = new uint64_t[numWords];
34   assert(result && "APInt memory allocation fails!");
35   memset(result, 0, numWords * sizeof(uint64_t));
36   return result;
37 }
38 
39 /// A utility function for allocating memory and checking for allocation
40 /// failure.  The content is not zeroed.
41 inline static uint64_t* getMemory(unsigned numWords) {
42   uint64_t * result = new uint64_t[numWords];
43   assert(result && "APInt memory allocation fails!");
44   return result;
45 }
46 
47 /// A utility function that converts a character to a digit.
48 inline static unsigned getDigit(char cdigit, uint8_t radix) {
49   unsigned r;
50 
51   if (radix == 16) {
52     r = cdigit - '0';
53     if (r <= 9)
54       return r;
55 
56     r = cdigit - 'A';
57     if (r <= 5)
58       return r + 10;
59 
60     r = cdigit - 'a';
61     if (r <= 5)
62       return r + 10;
63   }
64 
65   r = cdigit - '0';
66   if (r < radix)
67     return r;
68 
69   return -1U;
70 }
71 
72 
73 void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
74   pVal = getClearedMemory(getNumWords());
75   pVal[0] = val;
76   if (isSigned && int64_t(val) < 0)
77     for (unsigned i = 1; i < getNumWords(); ++i)
78       pVal[i] = -1ULL;
79 }
80 
81 void APInt::initSlowCase(const APInt& that) {
82   pVal = getMemory(getNumWords());
83   memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
84 }
85 
86 
87 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
88   : BitWidth(numBits), VAL(0) {
89   assert(BitWidth && "Bitwidth too small");
90   assert(bigVal && "Null pointer detected!");
91   if (isSingleWord())
92     VAL = bigVal[0];
93   else {
94     // Get memory, cleared to 0
95     pVal = getClearedMemory(getNumWords());
96     // Calculate the number of words to copy
97     unsigned words = std::min<unsigned>(numWords, getNumWords());
98     // Copy the words from bigVal to pVal
99     memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
100   }
101   // Make sure unused high bits are cleared
102   clearUnusedBits();
103 }
104 
105 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
106   : BitWidth(numbits), VAL(0) {
107   assert(BitWidth && "Bitwidth too small");
108   fromString(numbits, Str, radix);
109 }
110 
111 APInt& APInt::AssignSlowCase(const APInt& RHS) {
112   // Don't do anything for X = X
113   if (this == &RHS)
114     return *this;
115 
116   if (BitWidth == RHS.getBitWidth()) {
117     // assume same bit-width single-word case is already handled
118     assert(!isSingleWord());
119     memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
120     return *this;
121   }
122 
123   if (isSingleWord()) {
124     // assume case where both are single words is already handled
125     assert(!RHS.isSingleWord());
126     VAL = 0;
127     pVal = getMemory(RHS.getNumWords());
128     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
129   } else if (getNumWords() == RHS.getNumWords())
130     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
131   else if (RHS.isSingleWord()) {
132     delete [] pVal;
133     VAL = RHS.VAL;
134   } else {
135     delete [] pVal;
136     pVal = getMemory(RHS.getNumWords());
137     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
138   }
139   BitWidth = RHS.BitWidth;
140   return clearUnusedBits();
141 }
142 
143 APInt& APInt::operator=(uint64_t RHS) {
144   if (isSingleWord())
145     VAL = RHS;
146   else {
147     pVal[0] = RHS;
148     memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
149   }
150   return clearUnusedBits();
151 }
152 
153 /// Profile - This method 'profiles' an APInt for use with FoldingSet.
154 void APInt::Profile(FoldingSetNodeID& ID) const {
155   ID.AddInteger(BitWidth);
156 
157   if (isSingleWord()) {
158     ID.AddInteger(VAL);
159     return;
160   }
161 
162   unsigned NumWords = getNumWords();
163   for (unsigned i = 0; i < NumWords; ++i)
164     ID.AddInteger(pVal[i]);
165 }
166 
167 /// add_1 - This function adds a single "digit" integer, y, to the multiple
168 /// "digit" integer array,  x[]. x[] is modified to reflect the addition and
169 /// 1 is returned if there is a carry out, otherwise 0 is returned.
170 /// @returns the carry of the addition.
171 static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
172   for (unsigned i = 0; i < len; ++i) {
173     dest[i] = y + x[i];
174     if (dest[i] < y)
175       y = 1; // Carry one to next digit.
176     else {
177       y = 0; // No need to carry so exit early
178       break;
179     }
180   }
181   return y;
182 }
183 
184 /// @brief Prefix increment operator. Increments the APInt by one.
185 APInt& APInt::operator++() {
186   if (isSingleWord())
187     ++VAL;
188   else
189     add_1(pVal, pVal, getNumWords(), 1);
190   return clearUnusedBits();
191 }
192 
193 /// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
194 /// the multi-digit integer array, x[], propagating the borrowed 1 value until
195 /// no further borrowing is neeeded or it runs out of "digits" in x.  The result
196 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
197 /// In other words, if y > x then this function returns 1, otherwise 0.
198 /// @returns the borrow out of the subtraction
199 static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
200   for (unsigned i = 0; i < len; ++i) {
201     uint64_t X = x[i];
202     x[i] -= y;
203     if (y > X)
204       y = 1;  // We have to "borrow 1" from next "digit"
205     else {
206       y = 0;  // No need to borrow
207       break;  // Remaining digits are unchanged so exit early
208     }
209   }
210   return bool(y);
211 }
212 
213 /// @brief Prefix decrement operator. Decrements the APInt by one.
214 APInt& APInt::operator--() {
215   if (isSingleWord())
216     --VAL;
217   else
218     sub_1(pVal, getNumWords(), 1);
219   return clearUnusedBits();
220 }
221 
222 /// add - This function adds the integer array x to the integer array Y and
223 /// places the result in dest.
224 /// @returns the carry out from the addition
225 /// @brief General addition of 64-bit integer arrays
226 static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
227                 unsigned len) {
228   bool carry = false;
229   for (unsigned i = 0; i< len; ++i) {
230     uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
231     dest[i] = x[i] + y[i] + carry;
232     carry = dest[i] < limit || (carry && dest[i] == limit);
233   }
234   return carry;
235 }
236 
237 /// Adds the RHS APint to this APInt.
238 /// @returns this, after addition of RHS.
239 /// @brief Addition assignment operator.
240 APInt& APInt::operator+=(const APInt& RHS) {
241   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
242   if (isSingleWord())
243     VAL += RHS.VAL;
244   else {
245     add(pVal, pVal, RHS.pVal, getNumWords());
246   }
247   return clearUnusedBits();
248 }
249 
250 /// Subtracts the integer array y from the integer array x
251 /// @returns returns the borrow out.
252 /// @brief Generalized subtraction of 64-bit integer arrays.
253 static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
254                 unsigned len) {
255   bool borrow = false;
256   for (unsigned i = 0; i < len; ++i) {
257     uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
258     borrow = y[i] > x_tmp || (borrow && x[i] == 0);
259     dest[i] = x_tmp - y[i];
260   }
261   return borrow;
262 }
263 
264 /// Subtracts the RHS APInt from this APInt
265 /// @returns this, after subtraction
266 /// @brief Subtraction assignment operator.
267 APInt& APInt::operator-=(const APInt& RHS) {
268   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
269   if (isSingleWord())
270     VAL -= RHS.VAL;
271   else
272     sub(pVal, pVal, RHS.pVal, getNumWords());
273   return clearUnusedBits();
274 }
275 
276 /// Multiplies an integer array, x, by a uint64_t integer and places the result
277 /// into dest.
278 /// @returns the carry out of the multiplication.
279 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
280 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
281   // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
282   uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
283   uint64_t carry = 0;
284 
285   // For each digit of x.
286   for (unsigned i = 0; i < len; ++i) {
287     // Split x into high and low words
288     uint64_t lx = x[i] & 0xffffffffULL;
289     uint64_t hx = x[i] >> 32;
290     // hasCarry - A flag to indicate if there is a carry to the next digit.
291     // hasCarry == 0, no carry
292     // hasCarry == 1, has carry
293     // hasCarry == 2, no carry and the calculation result == 0.
294     uint8_t hasCarry = 0;
295     dest[i] = carry + lx * ly;
296     // Determine if the add above introduces carry.
297     hasCarry = (dest[i] < carry) ? 1 : 0;
298     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
299     // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
300     // (2^32 - 1) + 2^32 = 2^64.
301     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
302 
303     carry += (lx * hy) & 0xffffffffULL;
304     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
305     carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
306             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
307   }
308   return carry;
309 }
310 
311 /// Multiplies integer array x by integer array y and stores the result into
312 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
313 /// @brief Generalized multiplicate of integer arrays.
314 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
315                 unsigned ylen) {
316   dest[xlen] = mul_1(dest, x, xlen, y[0]);
317   for (unsigned i = 1; i < ylen; ++i) {
318     uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
319     uint64_t carry = 0, lx = 0, hx = 0;
320     for (unsigned j = 0; j < xlen; ++j) {
321       lx = x[j] & 0xffffffffULL;
322       hx = x[j] >> 32;
323       // hasCarry - A flag to indicate if has carry.
324       // hasCarry == 0, no carry
325       // hasCarry == 1, has carry
326       // hasCarry == 2, no carry and the calculation result == 0.
327       uint8_t hasCarry = 0;
328       uint64_t resul = carry + lx * ly;
329       hasCarry = (resul < carry) ? 1 : 0;
330       carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
331       hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
332 
333       carry += (lx * hy) & 0xffffffffULL;
334       resul = (carry << 32) | (resul & 0xffffffffULL);
335       dest[i+j] += resul;
336       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
337               (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
338               ((lx * hy) >> 32) + hx * hy;
339     }
340     dest[i+xlen] = carry;
341   }
342 }
343 
344 APInt& APInt::operator*=(const APInt& RHS) {
345   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
346   if (isSingleWord()) {
347     VAL *= RHS.VAL;
348     clearUnusedBits();
349     return *this;
350   }
351 
352   // Get some bit facts about LHS and check for zero
353   unsigned lhsBits = getActiveBits();
354   unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
355   if (!lhsWords)
356     // 0 * X ===> 0
357     return *this;
358 
359   // Get some bit facts about RHS and check for zero
360   unsigned rhsBits = RHS.getActiveBits();
361   unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
362   if (!rhsWords) {
363     // X * 0 ===> 0
364     clearAllBits();
365     return *this;
366   }
367 
368   // Allocate space for the result
369   unsigned destWords = rhsWords + lhsWords;
370   uint64_t *dest = getMemory(destWords);
371 
372   // Perform the long multiply
373   mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
374 
375   // Copy result back into *this
376   clearAllBits();
377   unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
378   memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
379 
380   // delete dest array and return
381   delete[] dest;
382   return *this;
383 }
384 
385 APInt& APInt::operator&=(const APInt& RHS) {
386   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
387   if (isSingleWord()) {
388     VAL &= RHS.VAL;
389     return *this;
390   }
391   unsigned numWords = getNumWords();
392   for (unsigned i = 0; i < numWords; ++i)
393     pVal[i] &= RHS.pVal[i];
394   return *this;
395 }
396 
397 APInt& APInt::operator|=(const APInt& RHS) {
398   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
399   if (isSingleWord()) {
400     VAL |= RHS.VAL;
401     return *this;
402   }
403   unsigned numWords = getNumWords();
404   for (unsigned i = 0; i < numWords; ++i)
405     pVal[i] |= RHS.pVal[i];
406   return *this;
407 }
408 
409 APInt& APInt::operator^=(const APInt& RHS) {
410   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
411   if (isSingleWord()) {
412     VAL ^= RHS.VAL;
413     this->clearUnusedBits();
414     return *this;
415   }
416   unsigned numWords = getNumWords();
417   for (unsigned i = 0; i < numWords; ++i)
418     pVal[i] ^= RHS.pVal[i];
419   return clearUnusedBits();
420 }
421 
422 APInt APInt::AndSlowCase(const APInt& RHS) const {
423   unsigned numWords = getNumWords();
424   uint64_t* val = getMemory(numWords);
425   for (unsigned i = 0; i < numWords; ++i)
426     val[i] = pVal[i] & RHS.pVal[i];
427   return APInt(val, getBitWidth());
428 }
429 
430 APInt APInt::OrSlowCase(const APInt& RHS) const {
431   unsigned numWords = getNumWords();
432   uint64_t *val = getMemory(numWords);
433   for (unsigned i = 0; i < numWords; ++i)
434     val[i] = pVal[i] | RHS.pVal[i];
435   return APInt(val, getBitWidth());
436 }
437 
438 APInt APInt::XorSlowCase(const APInt& RHS) const {
439   unsigned numWords = getNumWords();
440   uint64_t *val = getMemory(numWords);
441   for (unsigned i = 0; i < numWords; ++i)
442     val[i] = pVal[i] ^ RHS.pVal[i];
443 
444   // 0^0==1 so clear the high bits in case they got set.
445   return APInt(val, getBitWidth()).clearUnusedBits();
446 }
447 
448 bool APInt::operator !() const {
449   if (isSingleWord())
450     return !VAL;
451 
452   for (unsigned i = 0; i < getNumWords(); ++i)
453     if (pVal[i])
454       return false;
455   return true;
456 }
457 
458 APInt APInt::operator*(const APInt& RHS) const {
459   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
460   if (isSingleWord())
461     return APInt(BitWidth, VAL * RHS.VAL);
462   APInt Result(*this);
463   Result *= RHS;
464   return Result.clearUnusedBits();
465 }
466 
467 APInt APInt::operator+(const APInt& RHS) const {
468   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
469   if (isSingleWord())
470     return APInt(BitWidth, VAL + RHS.VAL);
471   APInt Result(BitWidth, 0);
472   add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
473   return Result.clearUnusedBits();
474 }
475 
476 APInt APInt::operator-(const APInt& RHS) const {
477   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
478   if (isSingleWord())
479     return APInt(BitWidth, VAL - RHS.VAL);
480   APInt Result(BitWidth, 0);
481   sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
482   return Result.clearUnusedBits();
483 }
484 
485 bool APInt::operator[](unsigned bitPosition) const {
486   assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
487   return (maskBit(bitPosition) &
488           (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
489 }
490 
491 bool APInt::EqualSlowCase(const APInt& RHS) const {
492   // Get some facts about the number of bits used in the two operands.
493   unsigned n1 = getActiveBits();
494   unsigned n2 = RHS.getActiveBits();
495 
496   // If the number of bits isn't the same, they aren't equal
497   if (n1 != n2)
498     return false;
499 
500   // If the number of bits fits in a word, we only need to compare the low word.
501   if (n1 <= APINT_BITS_PER_WORD)
502     return pVal[0] == RHS.pVal[0];
503 
504   // Otherwise, compare everything
505   for (int i = whichWord(n1 - 1); i >= 0; --i)
506     if (pVal[i] != RHS.pVal[i])
507       return false;
508   return true;
509 }
510 
511 bool APInt::EqualSlowCase(uint64_t Val) const {
512   unsigned n = getActiveBits();
513   if (n <= APINT_BITS_PER_WORD)
514     return pVal[0] == Val;
515   else
516     return false;
517 }
518 
519 bool APInt::ult(const APInt& RHS) const {
520   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
521   if (isSingleWord())
522     return VAL < RHS.VAL;
523 
524   // Get active bit length of both operands
525   unsigned n1 = getActiveBits();
526   unsigned n2 = RHS.getActiveBits();
527 
528   // If magnitude of LHS is less than RHS, return true.
529   if (n1 < n2)
530     return true;
531 
532   // If magnitude of RHS is greather than LHS, return false.
533   if (n2 < n1)
534     return false;
535 
536   // If they bot fit in a word, just compare the low order word
537   if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
538     return pVal[0] < RHS.pVal[0];
539 
540   // Otherwise, compare all words
541   unsigned topWord = whichWord(std::max(n1,n2)-1);
542   for (int i = topWord; i >= 0; --i) {
543     if (pVal[i] > RHS.pVal[i])
544       return false;
545     if (pVal[i] < RHS.pVal[i])
546       return true;
547   }
548   return false;
549 }
550 
551 bool APInt::slt(const APInt& RHS) const {
552   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
553   if (isSingleWord()) {
554     int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
555     int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
556     return lhsSext < rhsSext;
557   }
558 
559   APInt lhs(*this);
560   APInt rhs(RHS);
561   bool lhsNeg = isNegative();
562   bool rhsNeg = rhs.isNegative();
563   if (lhsNeg) {
564     // Sign bit is set so perform two's complement to make it positive
565     lhs.flipAllBits();
566     lhs++;
567   }
568   if (rhsNeg) {
569     // Sign bit is set so perform two's complement to make it positive
570     rhs.flipAllBits();
571     rhs++;
572   }
573 
574   // Now we have unsigned values to compare so do the comparison if necessary
575   // based on the negativeness of the values.
576   if (lhsNeg)
577     if (rhsNeg)
578       return lhs.ugt(rhs);
579     else
580       return true;
581   else if (rhsNeg)
582     return false;
583   else
584     return lhs.ult(rhs);
585 }
586 
587 void APInt::setBit(unsigned bitPosition) {
588   if (isSingleWord())
589     VAL |= maskBit(bitPosition);
590   else
591     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
592 }
593 
594 /// Set the given bit to 0 whose position is given as "bitPosition".
595 /// @brief Set a given bit to 0.
596 void APInt::clearBit(unsigned bitPosition) {
597   if (isSingleWord())
598     VAL &= ~maskBit(bitPosition);
599   else
600     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
601 }
602 
603 /// @brief Toggle every bit to its opposite value.
604 
605 /// Toggle a given bit to its opposite value whose position is given
606 /// as "bitPosition".
607 /// @brief Toggles a given bit to its opposite value.
608 void APInt::flipBit(unsigned bitPosition) {
609   assert(bitPosition < BitWidth && "Out of the bit-width range!");
610   if ((*this)[bitPosition]) clearBit(bitPosition);
611   else setBit(bitPosition);
612 }
613 
614 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
615   assert(!str.empty() && "Invalid string length");
616   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
617          "Radix should be 2, 8, 10, or 16!");
618 
619   size_t slen = str.size();
620 
621   // Each computation below needs to know if it's negative.
622   StringRef::iterator p = str.begin();
623   unsigned isNegative = *p == '-';
624   if (*p == '-' || *p == '+') {
625     p++;
626     slen--;
627     assert(slen && "String is only a sign, needs a value.");
628   }
629 
630   // For radixes of power-of-two values, the bits required is accurately and
631   // easily computed
632   if (radix == 2)
633     return slen + isNegative;
634   if (radix == 8)
635     return slen * 3 + isNegative;
636   if (radix == 16)
637     return slen * 4 + isNegative;
638 
639   // This is grossly inefficient but accurate. We could probably do something
640   // with a computation of roughly slen*64/20 and then adjust by the value of
641   // the first few digits. But, I'm not sure how accurate that could be.
642 
643   // Compute a sufficient number of bits that is always large enough but might
644   // be too large. This avoids the assertion in the constructor. This
645   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
646   // bits in that case.
647   unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
648 
649   // Convert to the actual binary value.
650   APInt tmp(sufficient, StringRef(p, slen), radix);
651 
652   // Compute how many bits are required. If the log is infinite, assume we need
653   // just bit.
654   unsigned log = tmp.logBase2();
655   if (log == (unsigned)-1) {
656     return isNegative + 1;
657   } else {
658     return isNegative + log + 1;
659   }
660 }
661 
662 // From http://www.burtleburtle.net, byBob Jenkins.
663 // When targeting x86, both GCC and LLVM seem to recognize this as a
664 // rotate instruction.
665 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
666 
667 // From http://www.burtleburtle.net, by Bob Jenkins.
668 #define mix(a,b,c) \
669   { \
670     a -= c;  a ^= rot(c, 4);  c += b; \
671     b -= a;  b ^= rot(a, 6);  a += c; \
672     c -= b;  c ^= rot(b, 8);  b += a; \
673     a -= c;  a ^= rot(c,16);  c += b; \
674     b -= a;  b ^= rot(a,19);  a += c; \
675     c -= b;  c ^= rot(b, 4);  b += a; \
676   }
677 
678 // From http://www.burtleburtle.net, by Bob Jenkins.
679 #define final(a,b,c) \
680   { \
681     c ^= b; c -= rot(b,14); \
682     a ^= c; a -= rot(c,11); \
683     b ^= a; b -= rot(a,25); \
684     c ^= b; c -= rot(b,16); \
685     a ^= c; a -= rot(c,4);  \
686     b ^= a; b -= rot(a,14); \
687     c ^= b; c -= rot(b,24); \
688   }
689 
690 // hashword() was adapted from http://www.burtleburtle.net, by Bob
691 // Jenkins.  k is a pointer to an array of uint32_t values; length is
692 // the length of the key, in 32-bit chunks.  This version only handles
693 // keys that are a multiple of 32 bits in size.
694 static inline uint32_t hashword(const uint64_t *k64, size_t length)
695 {
696   const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
697   uint32_t a,b,c;
698 
699   /* Set up the internal state */
700   a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
701 
702   /*------------------------------------------------- handle most of the key */
703   while (length > 3) {
704     a += k[0];
705     b += k[1];
706     c += k[2];
707     mix(a,b,c);
708     length -= 3;
709     k += 3;
710   }
711 
712   /*------------------------------------------- handle the last 3 uint32_t's */
713   switch (length) {                  /* all the case statements fall through */
714   case 3 : c+=k[2];
715   case 2 : b+=k[1];
716   case 1 : a+=k[0];
717     final(a,b,c);
718     case 0:     /* case 0: nothing left to add */
719       break;
720     }
721   /*------------------------------------------------------ report the result */
722   return c;
723 }
724 
725 // hashword8() was adapted from http://www.burtleburtle.net, by Bob
726 // Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
727 // targeting x86 (32 or 64 bit), both LLVM and GCC compile this
728 // function into about 35 instructions when inlined.
729 static inline uint32_t hashword8(const uint64_t k64)
730 {
731   uint32_t a,b,c;
732   a = b = c = 0xdeadbeef + 4;
733   b += k64 >> 32;
734   a += k64 & 0xffffffff;
735   final(a,b,c);
736   return c;
737 }
738 #undef final
739 #undef mix
740 #undef rot
741 
742 uint64_t APInt::getHashValue() const {
743   uint64_t hash;
744   if (isSingleWord())
745     hash = hashword8(VAL);
746   else
747     hash = hashword(pVal, getNumWords()*2);
748   return hash;
749 }
750 
751 /// HiBits - This function returns the high "numBits" bits of this APInt.
752 APInt APInt::getHiBits(unsigned numBits) const {
753   return APIntOps::lshr(*this, BitWidth - numBits);
754 }
755 
756 /// LoBits - This function returns the low "numBits" bits of this APInt.
757 APInt APInt::getLoBits(unsigned numBits) const {
758   return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
759                         BitWidth - numBits);
760 }
761 
762 unsigned APInt::countLeadingZerosSlowCase() const {
763   // Treat the most significand word differently because it might have
764   // meaningless bits set beyond the precision.
765   unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
766   integerPart MSWMask;
767   if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
768   else {
769     MSWMask = ~integerPart(0);
770     BitsInMSW = APINT_BITS_PER_WORD;
771   }
772 
773   unsigned i = getNumWords();
774   integerPart MSW = pVal[i-1] & MSWMask;
775   if (MSW)
776     return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
777 
778   unsigned Count = BitsInMSW;
779   for (--i; i > 0u; --i) {
780     if (pVal[i-1] == 0)
781       Count += APINT_BITS_PER_WORD;
782     else {
783       Count += CountLeadingZeros_64(pVal[i-1]);
784       break;
785     }
786   }
787   return Count;
788 }
789 
790 static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
791   unsigned Count = 0;
792   if (skip)
793     V <<= skip;
794   while (V && (V & (1ULL << 63))) {
795     Count++;
796     V <<= 1;
797   }
798   return Count;
799 }
800 
801 unsigned APInt::countLeadingOnes() const {
802   if (isSingleWord())
803     return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
804 
805   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
806   unsigned shift;
807   if (!highWordBits) {
808     highWordBits = APINT_BITS_PER_WORD;
809     shift = 0;
810   } else {
811     shift = APINT_BITS_PER_WORD - highWordBits;
812   }
813   int i = getNumWords() - 1;
814   unsigned Count = countLeadingOnes_64(pVal[i], shift);
815   if (Count == highWordBits) {
816     for (i--; i >= 0; --i) {
817       if (pVal[i] == -1ULL)
818         Count += APINT_BITS_PER_WORD;
819       else {
820         Count += countLeadingOnes_64(pVal[i], 0);
821         break;
822       }
823     }
824   }
825   return Count;
826 }
827 
828 unsigned APInt::countTrailingZeros() const {
829   if (isSingleWord())
830     return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
831   unsigned Count = 0;
832   unsigned i = 0;
833   for (; i < getNumWords() && pVal[i] == 0; ++i)
834     Count += APINT_BITS_PER_WORD;
835   if (i < getNumWords())
836     Count += CountTrailingZeros_64(pVal[i]);
837   return std::min(Count, BitWidth);
838 }
839 
840 unsigned APInt::countTrailingOnesSlowCase() const {
841   unsigned Count = 0;
842   unsigned i = 0;
843   for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
844     Count += APINT_BITS_PER_WORD;
845   if (i < getNumWords())
846     Count += CountTrailingOnes_64(pVal[i]);
847   return std::min(Count, BitWidth);
848 }
849 
850 unsigned APInt::countPopulationSlowCase() const {
851   unsigned Count = 0;
852   for (unsigned i = 0; i < getNumWords(); ++i)
853     Count += CountPopulation_64(pVal[i]);
854   return Count;
855 }
856 
857 APInt APInt::byteSwap() const {
858   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
859   if (BitWidth == 16)
860     return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
861   else if (BitWidth == 32)
862     return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
863   else if (BitWidth == 48) {
864     unsigned Tmp1 = unsigned(VAL >> 16);
865     Tmp1 = ByteSwap_32(Tmp1);
866     uint16_t Tmp2 = uint16_t(VAL);
867     Tmp2 = ByteSwap_16(Tmp2);
868     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
869   } else if (BitWidth == 64)
870     return APInt(BitWidth, ByteSwap_64(VAL));
871   else {
872     APInt Result(BitWidth, 0);
873     char *pByte = (char*)Result.pVal;
874     for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
875       char Tmp = pByte[i];
876       pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
877       pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
878     }
879     return Result;
880   }
881 }
882 
883 APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
884                                             const APInt& API2) {
885   APInt A = API1, B = API2;
886   while (!!B) {
887     APInt T = B;
888     B = APIntOps::urem(A, B);
889     A = T;
890   }
891   return A;
892 }
893 
894 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
895   union {
896     double D;
897     uint64_t I;
898   } T;
899   T.D = Double;
900 
901   // Get the sign bit from the highest order bit
902   bool isNeg = T.I >> 63;
903 
904   // Get the 11-bit exponent and adjust for the 1023 bit bias
905   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
906 
907   // If the exponent is negative, the value is < 0 so just return 0.
908   if (exp < 0)
909     return APInt(width, 0u);
910 
911   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
912   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
913 
914   // If the exponent doesn't shift all bits out of the mantissa
915   if (exp < 52)
916     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
917                     APInt(width, mantissa >> (52 - exp));
918 
919   // If the client didn't provide enough bits for us to shift the mantissa into
920   // then the result is undefined, just return 0
921   if (width <= exp - 52)
922     return APInt(width, 0);
923 
924   // Otherwise, we have to shift the mantissa bits up to the right location
925   APInt Tmp(width, mantissa);
926   Tmp = Tmp.shl((unsigned)exp - 52);
927   return isNeg ? -Tmp : Tmp;
928 }
929 
930 /// RoundToDouble - This function converts this APInt to a double.
931 /// The layout for double is as following (IEEE Standard 754):
932 ///  --------------------------------------
933 /// |  Sign    Exponent    Fraction    Bias |
934 /// |-------------------------------------- |
935 /// |  1[63]   11[62-52]   52[51-00]   1023 |
936 ///  --------------------------------------
937 double APInt::roundToDouble(bool isSigned) const {
938 
939   // Handle the simple case where the value is contained in one uint64_t.
940   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
941   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
942     if (isSigned) {
943       int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
944       return double(sext);
945     } else
946       return double(getWord(0));
947   }
948 
949   // Determine if the value is negative.
950   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
951 
952   // Construct the absolute value if we're negative.
953   APInt Tmp(isNeg ? -(*this) : (*this));
954 
955   // Figure out how many bits we're using.
956   unsigned n = Tmp.getActiveBits();
957 
958   // The exponent (without bias normalization) is just the number of bits
959   // we are using. Note that the sign bit is gone since we constructed the
960   // absolute value.
961   uint64_t exp = n;
962 
963   // Return infinity for exponent overflow
964   if (exp > 1023) {
965     if (!isSigned || !isNeg)
966       return std::numeric_limits<double>::infinity();
967     else
968       return -std::numeric_limits<double>::infinity();
969   }
970   exp += 1023; // Increment for 1023 bias
971 
972   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
973   // extract the high 52 bits from the correct words in pVal.
974   uint64_t mantissa;
975   unsigned hiWord = whichWord(n-1);
976   if (hiWord == 0) {
977     mantissa = Tmp.pVal[0];
978     if (n > 52)
979       mantissa >>= n - 52; // shift down, we want the top 52 bits.
980   } else {
981     assert(hiWord > 0 && "huh?");
982     uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
983     uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
984     mantissa = hibits | lobits;
985   }
986 
987   // The leading bit of mantissa is implicit, so get rid of it.
988   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
989   union {
990     double D;
991     uint64_t I;
992   } T;
993   T.I = sign | (exp << 52) | mantissa;
994   return T.D;
995 }
996 
997 // Truncate to new width.
998 APInt APInt::trunc(unsigned width) const {
999   assert(width < BitWidth && "Invalid APInt Truncate request");
1000   assert(width && "Can't truncate to 0 bits");
1001 
1002   if (width <= APINT_BITS_PER_WORD)
1003     return APInt(width, getRawData()[0]);
1004 
1005   APInt Result(getMemory(getNumWords(width)), width);
1006 
1007   // Copy full words.
1008   unsigned i;
1009   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
1010     Result.pVal[i] = pVal[i];
1011 
1012   // Truncate and copy any partial word.
1013   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
1014   if (bits != 0)
1015     Result.pVal[i] = pVal[i] << bits >> bits;
1016 
1017   return Result;
1018 }
1019 
1020 // Sign extend to a new width.
1021 APInt APInt::sext(unsigned width) const {
1022   assert(width > BitWidth && "Invalid APInt SignExtend request");
1023 
1024   if (width <= APINT_BITS_PER_WORD) {
1025     uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
1026     val = (int64_t)val >> (width - BitWidth);
1027     return APInt(width, val >> (APINT_BITS_PER_WORD - width));
1028   }
1029 
1030   APInt Result(getMemory(getNumWords(width)), width);
1031 
1032   // Copy full words.
1033   unsigned i;
1034   uint64_t word = 0;
1035   for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
1036     word = getRawData()[i];
1037     Result.pVal[i] = word;
1038   }
1039 
1040   // Read and sign-extend any partial word.
1041   unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
1042   if (bits != 0)
1043     word = (int64_t)getRawData()[i] << bits >> bits;
1044   else
1045     word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
1046 
1047   // Write remaining full words.
1048   for (; i != width / APINT_BITS_PER_WORD; i++) {
1049     Result.pVal[i] = word;
1050     word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
1051   }
1052 
1053   // Write any partial word.
1054   bits = (0 - width) % APINT_BITS_PER_WORD;
1055   if (bits != 0)
1056     Result.pVal[i] = word << bits >> bits;
1057 
1058   return Result;
1059 }
1060 
1061 //  Zero extend to a new width.
1062 APInt APInt::zext(unsigned width) const {
1063   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1064 
1065   if (width <= APINT_BITS_PER_WORD)
1066     return APInt(width, VAL);
1067 
1068   APInt Result(getMemory(getNumWords(width)), width);
1069 
1070   // Copy words.
1071   unsigned i;
1072   for (i = 0; i != getNumWords(); i++)
1073     Result.pVal[i] = getRawData()[i];
1074 
1075   // Zero remaining words.
1076   memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
1077 
1078   return Result;
1079 }
1080 
1081 APInt APInt::zextOrTrunc(unsigned width) const {
1082   if (BitWidth < width)
1083     return zext(width);
1084   if (BitWidth > width)
1085     return trunc(width);
1086   return *this;
1087 }
1088 
1089 APInt APInt::sextOrTrunc(unsigned width) const {
1090   if (BitWidth < width)
1091     return sext(width);
1092   if (BitWidth > width)
1093     return trunc(width);
1094   return *this;
1095 }
1096 
1097 /// Arithmetic right-shift this APInt by shiftAmt.
1098 /// @brief Arithmetic right-shift function.
1099 APInt APInt::ashr(const APInt &shiftAmt) const {
1100   return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1101 }
1102 
1103 /// Arithmetic right-shift this APInt by shiftAmt.
1104 /// @brief Arithmetic right-shift function.
1105 APInt APInt::ashr(unsigned shiftAmt) const {
1106   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1107   // Handle a degenerate case
1108   if (shiftAmt == 0)
1109     return *this;
1110 
1111   // Handle single word shifts with built-in ashr
1112   if (isSingleWord()) {
1113     if (shiftAmt == BitWidth)
1114       return APInt(BitWidth, 0); // undefined
1115     else {
1116       unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1117       return APInt(BitWidth,
1118         (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1119     }
1120   }
1121 
1122   // If all the bits were shifted out, the result is, technically, undefined.
1123   // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1124   // issues in the algorithm below.
1125   if (shiftAmt == BitWidth) {
1126     if (isNegative())
1127       return APInt(BitWidth, -1ULL, true);
1128     else
1129       return APInt(BitWidth, 0);
1130   }
1131 
1132   // Create some space for the result.
1133   uint64_t * val = new uint64_t[getNumWords()];
1134 
1135   // Compute some values needed by the following shift algorithms
1136   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1137   unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1138   unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1139   unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1140   if (bitsInWord == 0)
1141     bitsInWord = APINT_BITS_PER_WORD;
1142 
1143   // If we are shifting whole words, just move whole words
1144   if (wordShift == 0) {
1145     // Move the words containing significant bits
1146     for (unsigned i = 0; i <= breakWord; ++i)
1147       val[i] = pVal[i+offset]; // move whole word
1148 
1149     // Adjust the top significant word for sign bit fill, if negative
1150     if (isNegative())
1151       if (bitsInWord < APINT_BITS_PER_WORD)
1152         val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1153   } else {
1154     // Shift the low order words
1155     for (unsigned i = 0; i < breakWord; ++i) {
1156       // This combines the shifted corresponding word with the low bits from
1157       // the next word (shifted into this word's high bits).
1158       val[i] = (pVal[i+offset] >> wordShift) |
1159                (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1160     }
1161 
1162     // Shift the break word. In this case there are no bits from the next word
1163     // to include in this word.
1164     val[breakWord] = pVal[breakWord+offset] >> wordShift;
1165 
1166     // Deal with sign extenstion in the break word, and possibly the word before
1167     // it.
1168     if (isNegative()) {
1169       if (wordShift > bitsInWord) {
1170         if (breakWord > 0)
1171           val[breakWord-1] |=
1172             ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1173         val[breakWord] |= ~0ULL;
1174       } else
1175         val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1176     }
1177   }
1178 
1179   // Remaining words are 0 or -1, just assign them.
1180   uint64_t fillValue = (isNegative() ? -1ULL : 0);
1181   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1182     val[i] = fillValue;
1183   return APInt(val, BitWidth).clearUnusedBits();
1184 }
1185 
1186 /// Logical right-shift this APInt by shiftAmt.
1187 /// @brief Logical right-shift function.
1188 APInt APInt::lshr(const APInt &shiftAmt) const {
1189   return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1190 }
1191 
1192 /// Logical right-shift this APInt by shiftAmt.
1193 /// @brief Logical right-shift function.
1194 APInt APInt::lshr(unsigned shiftAmt) const {
1195   if (isSingleWord()) {
1196     if (shiftAmt == BitWidth)
1197       return APInt(BitWidth, 0);
1198     else
1199       return APInt(BitWidth, this->VAL >> shiftAmt);
1200   }
1201 
1202   // If all the bits were shifted out, the result is 0. This avoids issues
1203   // with shifting by the size of the integer type, which produces undefined
1204   // results. We define these "undefined results" to always be 0.
1205   if (shiftAmt == BitWidth)
1206     return APInt(BitWidth, 0);
1207 
1208   // If none of the bits are shifted out, the result is *this. This avoids
1209   // issues with shifting by the size of the integer type, which produces
1210   // undefined results in the code below. This is also an optimization.
1211   if (shiftAmt == 0)
1212     return *this;
1213 
1214   // Create some space for the result.
1215   uint64_t * val = new uint64_t[getNumWords()];
1216 
1217   // If we are shifting less than a word, compute the shift with a simple carry
1218   if (shiftAmt < APINT_BITS_PER_WORD) {
1219     uint64_t carry = 0;
1220     for (int i = getNumWords()-1; i >= 0; --i) {
1221       val[i] = (pVal[i] >> shiftAmt) | carry;
1222       carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1223     }
1224     return APInt(val, BitWidth).clearUnusedBits();
1225   }
1226 
1227   // Compute some values needed by the remaining shift algorithms
1228   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1229   unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1230 
1231   // If we are shifting whole words, just move whole words
1232   if (wordShift == 0) {
1233     for (unsigned i = 0; i < getNumWords() - offset; ++i)
1234       val[i] = pVal[i+offset];
1235     for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1236       val[i] = 0;
1237     return APInt(val,BitWidth).clearUnusedBits();
1238   }
1239 
1240   // Shift the low order words
1241   unsigned breakWord = getNumWords() - offset -1;
1242   for (unsigned i = 0; i < breakWord; ++i)
1243     val[i] = (pVal[i+offset] >> wordShift) |
1244              (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1245   // Shift the break word.
1246   val[breakWord] = pVal[breakWord+offset] >> wordShift;
1247 
1248   // Remaining words are 0
1249   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1250     val[i] = 0;
1251   return APInt(val, BitWidth).clearUnusedBits();
1252 }
1253 
1254 /// Left-shift this APInt by shiftAmt.
1255 /// @brief Left-shift function.
1256 APInt APInt::shl(const APInt &shiftAmt) const {
1257   // It's undefined behavior in C to shift by BitWidth or greater.
1258   return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1259 }
1260 
1261 APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1262   // If all the bits were shifted out, the result is 0. This avoids issues
1263   // with shifting by the size of the integer type, which produces undefined
1264   // results. We define these "undefined results" to always be 0.
1265   if (shiftAmt == BitWidth)
1266     return APInt(BitWidth, 0);
1267 
1268   // If none of the bits are shifted out, the result is *this. This avoids a
1269   // lshr by the words size in the loop below which can produce incorrect
1270   // results. It also avoids the expensive computation below for a common case.
1271   if (shiftAmt == 0)
1272     return *this;
1273 
1274   // Create some space for the result.
1275   uint64_t * val = new uint64_t[getNumWords()];
1276 
1277   // If we are shifting less than a word, do it the easy way
1278   if (shiftAmt < APINT_BITS_PER_WORD) {
1279     uint64_t carry = 0;
1280     for (unsigned i = 0; i < getNumWords(); i++) {
1281       val[i] = pVal[i] << shiftAmt | carry;
1282       carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1283     }
1284     return APInt(val, BitWidth).clearUnusedBits();
1285   }
1286 
1287   // Compute some values needed by the remaining shift algorithms
1288   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1289   unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1290 
1291   // If we are shifting whole words, just move whole words
1292   if (wordShift == 0) {
1293     for (unsigned i = 0; i < offset; i++)
1294       val[i] = 0;
1295     for (unsigned i = offset; i < getNumWords(); i++)
1296       val[i] = pVal[i-offset];
1297     return APInt(val,BitWidth).clearUnusedBits();
1298   }
1299 
1300   // Copy whole words from this to Result.
1301   unsigned i = getNumWords() - 1;
1302   for (; i > offset; --i)
1303     val[i] = pVal[i-offset] << wordShift |
1304              pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1305   val[offset] = pVal[0] << wordShift;
1306   for (i = 0; i < offset; ++i)
1307     val[i] = 0;
1308   return APInt(val, BitWidth).clearUnusedBits();
1309 }
1310 
1311 APInt APInt::rotl(const APInt &rotateAmt) const {
1312   return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1313 }
1314 
1315 APInt APInt::rotl(unsigned rotateAmt) const {
1316   if (rotateAmt == 0)
1317     return *this;
1318   // Don't get too fancy, just use existing shift/or facilities
1319   APInt hi(*this);
1320   APInt lo(*this);
1321   hi.shl(rotateAmt);
1322   lo.lshr(BitWidth - rotateAmt);
1323   return hi | lo;
1324 }
1325 
1326 APInt APInt::rotr(const APInt &rotateAmt) const {
1327   return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1328 }
1329 
1330 APInt APInt::rotr(unsigned rotateAmt) const {
1331   if (rotateAmt == 0)
1332     return *this;
1333   // Don't get too fancy, just use existing shift/or facilities
1334   APInt hi(*this);
1335   APInt lo(*this);
1336   lo.lshr(rotateAmt);
1337   hi.shl(BitWidth - rotateAmt);
1338   return hi | lo;
1339 }
1340 
1341 // Square Root - this method computes and returns the square root of "this".
1342 // Three mechanisms are used for computation. For small values (<= 5 bits),
1343 // a table lookup is done. This gets some performance for common cases. For
1344 // values using less than 52 bits, the value is converted to double and then
1345 // the libc sqrt function is called. The result is rounded and then converted
1346 // back to a uint64_t which is then used to construct the result. Finally,
1347 // the Babylonian method for computing square roots is used.
1348 APInt APInt::sqrt() const {
1349 
1350   // Determine the magnitude of the value.
1351   unsigned magnitude = getActiveBits();
1352 
1353   // Use a fast table for some small values. This also gets rid of some
1354   // rounding errors in libc sqrt for small values.
1355   if (magnitude <= 5) {
1356     static const uint8_t results[32] = {
1357       /*     0 */ 0,
1358       /*  1- 2 */ 1, 1,
1359       /*  3- 6 */ 2, 2, 2, 2,
1360       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1361       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1362       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1363       /*    31 */ 6
1364     };
1365     return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1366   }
1367 
1368   // If the magnitude of the value fits in less than 52 bits (the precision of
1369   // an IEEE double precision floating point value), then we can use the
1370   // libc sqrt function which will probably use a hardware sqrt computation.
1371   // This should be faster than the algorithm below.
1372   if (magnitude < 52) {
1373 #if HAVE_ROUND
1374     return APInt(BitWidth,
1375                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1376 #else
1377     return APInt(BitWidth,
1378                  uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1379 #endif
1380   }
1381 
1382   // Okay, all the short cuts are exhausted. We must compute it. The following
1383   // is a classical Babylonian method for computing the square root. This code
1384   // was adapted to APINt from a wikipedia article on such computations.
1385   // See http://www.wikipedia.org/ and go to the page named
1386   // Calculate_an_integer_square_root.
1387   unsigned nbits = BitWidth, i = 4;
1388   APInt testy(BitWidth, 16);
1389   APInt x_old(BitWidth, 1);
1390   APInt x_new(BitWidth, 0);
1391   APInt two(BitWidth, 2);
1392 
1393   // Select a good starting value using binary logarithms.
1394   for (;; i += 2, testy = testy.shl(2))
1395     if (i >= nbits || this->ule(testy)) {
1396       x_old = x_old.shl(i / 2);
1397       break;
1398     }
1399 
1400   // Use the Babylonian method to arrive at the integer square root:
1401   for (;;) {
1402     x_new = (this->udiv(x_old) + x_old).udiv(two);
1403     if (x_old.ule(x_new))
1404       break;
1405     x_old = x_new;
1406   }
1407 
1408   // Make sure we return the closest approximation
1409   // NOTE: The rounding calculation below is correct. It will produce an
1410   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1411   // determined to be a rounding issue with pari/gp as it begins to use a
1412   // floating point representation after 192 bits. There are no discrepancies
1413   // between this algorithm and pari/gp for bit widths < 192 bits.
1414   APInt square(x_old * x_old);
1415   APInt nextSquare((x_old + 1) * (x_old +1));
1416   if (this->ult(square))
1417     return x_old;
1418   else if (this->ule(nextSquare)) {
1419     APInt midpoint((nextSquare - square).udiv(two));
1420     APInt offset(*this - square);
1421     if (offset.ult(midpoint))
1422       return x_old;
1423     else
1424       return x_old + 1;
1425   } else
1426     llvm_unreachable("Error in APInt::sqrt computation");
1427   return x_old + 1;
1428 }
1429 
1430 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1431 /// iterative extended Euclidean algorithm is used to solve for this value,
1432 /// however we simplify it to speed up calculating only the inverse, and take
1433 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1434 /// (potentially large) APInts around.
1435 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1436   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1437 
1438   // Using the properties listed at the following web page (accessed 06/21/08):
1439   //   http://www.numbertheory.org/php/euclid.html
1440   // (especially the properties numbered 3, 4 and 9) it can be proved that
1441   // BitWidth bits suffice for all the computations in the algorithm implemented
1442   // below. More precisely, this number of bits suffice if the multiplicative
1443   // inverse exists, but may not suffice for the general extended Euclidean
1444   // algorithm.
1445 
1446   APInt r[2] = { modulo, *this };
1447   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1448   APInt q(BitWidth, 0);
1449 
1450   unsigned i;
1451   for (i = 0; r[i^1] != 0; i ^= 1) {
1452     // An overview of the math without the confusing bit-flipping:
1453     // q = r[i-2] / r[i-1]
1454     // r[i] = r[i-2] % r[i-1]
1455     // t[i] = t[i-2] - t[i-1] * q
1456     udivrem(r[i], r[i^1], q, r[i]);
1457     t[i] -= t[i^1] * q;
1458   }
1459 
1460   // If this APInt and the modulo are not coprime, there is no multiplicative
1461   // inverse, so return 0. We check this by looking at the next-to-last
1462   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1463   // algorithm.
1464   if (r[i] != 1)
1465     return APInt(BitWidth, 0);
1466 
1467   // The next-to-last t is the multiplicative inverse.  However, we are
1468   // interested in a positive inverse. Calcuate a positive one from a negative
1469   // one if necessary. A simple addition of the modulo suffices because
1470   // abs(t[i]) is known to be less than *this/2 (see the link above).
1471   return t[i].isNegative() ? t[i] + modulo : t[i];
1472 }
1473 
1474 /// Calculate the magic numbers required to implement a signed integer division
1475 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1476 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1477 /// Warren, Jr., chapter 10.
1478 APInt::ms APInt::magic() const {
1479   const APInt& d = *this;
1480   unsigned p;
1481   APInt ad, anc, delta, q1, r1, q2, r2, t;
1482   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1483   struct ms mag;
1484 
1485   ad = d.abs();
1486   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1487   anc = t - 1 - t.urem(ad);   // absolute value of nc
1488   p = d.getBitWidth() - 1;    // initialize p
1489   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1490   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1491   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1492   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1493   do {
1494     p = p + 1;
1495     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1496     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1497     if (r1.uge(anc)) {  // must be unsigned comparison
1498       q1 = q1 + 1;
1499       r1 = r1 - anc;
1500     }
1501     q2 = q2<<1;          // update q2 = 2p/abs(d)
1502     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1503     if (r2.uge(ad)) {   // must be unsigned comparison
1504       q2 = q2 + 1;
1505       r2 = r2 - ad;
1506     }
1507     delta = ad - r2;
1508   } while (q1.ule(delta) || (q1 == delta && r1 == 0));
1509 
1510   mag.m = q2 + 1;
1511   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1512   mag.s = p - d.getBitWidth();          // resulting shift
1513   return mag;
1514 }
1515 
1516 /// Calculate the magic numbers required to implement an unsigned integer
1517 /// division by a constant as a sequence of multiplies, adds and shifts.
1518 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1519 /// S. Warren, Jr., chapter 10.
1520 APInt::mu APInt::magicu() const {
1521   const APInt& d = *this;
1522   unsigned p;
1523   APInt nc, delta, q1, r1, q2, r2;
1524   struct mu magu;
1525   magu.a = 0;               // initialize "add" indicator
1526   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1527   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1528   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1529 
1530   nc = allOnes - (-d).urem(d);
1531   p = d.getBitWidth() - 1;  // initialize p
1532   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1533   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1534   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1535   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1536   do {
1537     p = p + 1;
1538     if (r1.uge(nc - r1)) {
1539       q1 = q1 + q1 + 1;  // update q1
1540       r1 = r1 + r1 - nc; // update r1
1541     }
1542     else {
1543       q1 = q1+q1; // update q1
1544       r1 = r1+r1; // update r1
1545     }
1546     if ((r2 + 1).uge(d - r2)) {
1547       if (q2.uge(signedMax)) magu.a = 1;
1548       q2 = q2+q2 + 1;     // update q2
1549       r2 = r2+r2 + 1 - d; // update r2
1550     }
1551     else {
1552       if (q2.uge(signedMin)) magu.a = 1;
1553       q2 = q2+q2;     // update q2
1554       r2 = r2+r2 + 1; // update r2
1555     }
1556     delta = d - 1 - r2;
1557   } while (p < d.getBitWidth()*2 &&
1558            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1559   magu.m = q2 + 1; // resulting magic number
1560   magu.s = p - d.getBitWidth();  // resulting shift
1561   return magu;
1562 }
1563 
1564 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1565 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1566 /// variables here have the same names as in the algorithm. Comments explain
1567 /// the algorithm and any deviation from it.
1568 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1569                      unsigned m, unsigned n) {
1570   assert(u && "Must provide dividend");
1571   assert(v && "Must provide divisor");
1572   assert(q && "Must provide quotient");
1573   assert(u != v && u != q && v != q && "Must us different memory");
1574   assert(n>1 && "n must be > 1");
1575 
1576   // Knuth uses the value b as the base of the number system. In our case b
1577   // is 2^31 so we just set it to -1u.
1578   uint64_t b = uint64_t(1) << 32;
1579 
1580 #if 0
1581   DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1582   DEBUG(dbgs() << "KnuthDiv: original:");
1583   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1584   DEBUG(dbgs() << " by");
1585   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1586   DEBUG(dbgs() << '\n');
1587 #endif
1588   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1589   // u and v by d. Note that we have taken Knuth's advice here to use a power
1590   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1591   // 2 allows us to shift instead of multiply and it is easy to determine the
1592   // shift amount from the leading zeros.  We are basically normalizing the u
1593   // and v so that its high bits are shifted to the top of v's range without
1594   // overflow. Note that this can require an extra word in u so that u must
1595   // be of length m+n+1.
1596   unsigned shift = CountLeadingZeros_32(v[n-1]);
1597   unsigned v_carry = 0;
1598   unsigned u_carry = 0;
1599   if (shift) {
1600     for (unsigned i = 0; i < m+n; ++i) {
1601       unsigned u_tmp = u[i] >> (32 - shift);
1602       u[i] = (u[i] << shift) | u_carry;
1603       u_carry = u_tmp;
1604     }
1605     for (unsigned i = 0; i < n; ++i) {
1606       unsigned v_tmp = v[i] >> (32 - shift);
1607       v[i] = (v[i] << shift) | v_carry;
1608       v_carry = v_tmp;
1609     }
1610   }
1611   u[m+n] = u_carry;
1612 #if 0
1613   DEBUG(dbgs() << "KnuthDiv:   normal:");
1614   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1615   DEBUG(dbgs() << " by");
1616   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1617   DEBUG(dbgs() << '\n');
1618 #endif
1619 
1620   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1621   int j = m;
1622   do {
1623     DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1624     // D3. [Calculate q'.].
1625     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1626     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1627     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1628     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1629     // on v[n-2] determines at high speed most of the cases in which the trial
1630     // value qp is one too large, and it eliminates all cases where qp is two
1631     // too large.
1632     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1633     DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1634     uint64_t qp = dividend / v[n-1];
1635     uint64_t rp = dividend % v[n-1];
1636     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1637       qp--;
1638       rp += v[n-1];
1639       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1640         qp--;
1641     }
1642     DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1643 
1644     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1645     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1646     // consists of a simple multiplication by a one-place number, combined with
1647     // a subtraction.
1648     bool isNeg = false;
1649     for (unsigned i = 0; i < n; ++i) {
1650       uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1651       uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1652       bool borrow = subtrahend > u_tmp;
1653       DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
1654                    << ", subtrahend == " << subtrahend
1655                    << ", borrow = " << borrow << '\n');
1656 
1657       uint64_t result = u_tmp - subtrahend;
1658       unsigned k = j + i;
1659       u[k++] = (unsigned)(result & (b-1)); // subtract low word
1660       u[k++] = (unsigned)(result >> 32);   // subtract high word
1661       while (borrow && k <= m+n) { // deal with borrow to the left
1662         borrow = u[k] == 0;
1663         u[k]--;
1664         k++;
1665       }
1666       isNeg |= borrow;
1667       DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1668                     u[j+i+1] << '\n');
1669     }
1670     DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1671     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1672     DEBUG(dbgs() << '\n');
1673     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1674     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1675     // true value plus b**(n+1), namely as the b's complement of
1676     // the true value, and a "borrow" to the left should be remembered.
1677     //
1678     if (isNeg) {
1679       bool carry = true;  // true because b's complement is "complement + 1"
1680       for (unsigned i = 0; i <= m+n; ++i) {
1681         u[i] = ~u[i] + carry; // b's complement
1682         carry = carry && u[i] == 0;
1683       }
1684     }
1685     DEBUG(dbgs() << "KnuthDiv: after complement:");
1686     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1687     DEBUG(dbgs() << '\n');
1688 
1689     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1690     // negative, go to step D6; otherwise go on to step D7.
1691     q[j] = (unsigned)qp;
1692     if (isNeg) {
1693       // D6. [Add back]. The probability that this step is necessary is very
1694       // small, on the order of only 2/b. Make sure that test data accounts for
1695       // this possibility. Decrease q[j] by 1
1696       q[j]--;
1697       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1698       // A carry will occur to the left of u[j+n], and it should be ignored
1699       // since it cancels with the borrow that occurred in D4.
1700       bool carry = false;
1701       for (unsigned i = 0; i < n; i++) {
1702         unsigned limit = std::min(u[j+i],v[i]);
1703         u[j+i] += v[i] + carry;
1704         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1705       }
1706       u[j+n] += carry;
1707     }
1708     DEBUG(dbgs() << "KnuthDiv: after correction:");
1709     DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
1710     DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1711 
1712   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1713   } while (--j >= 0);
1714 
1715   DEBUG(dbgs() << "KnuthDiv: quotient:");
1716   DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1717   DEBUG(dbgs() << '\n');
1718 
1719   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1720   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1721   // compute the remainder (urem uses this).
1722   if (r) {
1723     // The value d is expressed by the "shift" value above since we avoided
1724     // multiplication by d by using a shift left. So, all we have to do is
1725     // shift right here. In order to mak
1726     if (shift) {
1727       unsigned carry = 0;
1728       DEBUG(dbgs() << "KnuthDiv: remainder:");
1729       for (int i = n-1; i >= 0; i--) {
1730         r[i] = (u[i] >> shift) | carry;
1731         carry = u[i] << (32 - shift);
1732         DEBUG(dbgs() << " " << r[i]);
1733       }
1734     } else {
1735       for (int i = n-1; i >= 0; i--) {
1736         r[i] = u[i];
1737         DEBUG(dbgs() << " " << r[i]);
1738       }
1739     }
1740     DEBUG(dbgs() << '\n');
1741   }
1742 #if 0
1743   DEBUG(dbgs() << '\n');
1744 #endif
1745 }
1746 
1747 void APInt::divide(const APInt LHS, unsigned lhsWords,
1748                    const APInt &RHS, unsigned rhsWords,
1749                    APInt *Quotient, APInt *Remainder)
1750 {
1751   assert(lhsWords >= rhsWords && "Fractional result");
1752 
1753   // First, compose the values into an array of 32-bit words instead of
1754   // 64-bit words. This is a necessity of both the "short division" algorithm
1755   // and the Knuth "classical algorithm" which requires there to be native
1756   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1757   // can't use 64-bit operands here because we don't have native results of
1758   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1759   // work on large-endian machines.
1760   uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1761   unsigned n = rhsWords * 2;
1762   unsigned m = (lhsWords * 2) - n;
1763 
1764   // Allocate space for the temporary values we need either on the stack, if
1765   // it will fit, or on the heap if it won't.
1766   unsigned SPACE[128];
1767   unsigned *U = 0;
1768   unsigned *V = 0;
1769   unsigned *Q = 0;
1770   unsigned *R = 0;
1771   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1772     U = &SPACE[0];
1773     V = &SPACE[m+n+1];
1774     Q = &SPACE[(m+n+1) + n];
1775     if (Remainder)
1776       R = &SPACE[(m+n+1) + n + (m+n)];
1777   } else {
1778     U = new unsigned[m + n + 1];
1779     V = new unsigned[n];
1780     Q = new unsigned[m+n];
1781     if (Remainder)
1782       R = new unsigned[n];
1783   }
1784 
1785   // Initialize the dividend
1786   memset(U, 0, (m+n+1)*sizeof(unsigned));
1787   for (unsigned i = 0; i < lhsWords; ++i) {
1788     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1789     U[i * 2] = (unsigned)(tmp & mask);
1790     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1791   }
1792   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1793 
1794   // Initialize the divisor
1795   memset(V, 0, (n)*sizeof(unsigned));
1796   for (unsigned i = 0; i < rhsWords; ++i) {
1797     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1798     V[i * 2] = (unsigned)(tmp & mask);
1799     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1800   }
1801 
1802   // initialize the quotient and remainder
1803   memset(Q, 0, (m+n) * sizeof(unsigned));
1804   if (Remainder)
1805     memset(R, 0, n * sizeof(unsigned));
1806 
1807   // Now, adjust m and n for the Knuth division. n is the number of words in
1808   // the divisor. m is the number of words by which the dividend exceeds the
1809   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1810   // contain any zero words or the Knuth algorithm fails.
1811   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1812     n--;
1813     m++;
1814   }
1815   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1816     m--;
1817 
1818   // If we're left with only a single word for the divisor, Knuth doesn't work
1819   // so we implement the short division algorithm here. This is much simpler
1820   // and faster because we are certain that we can divide a 64-bit quantity
1821   // by a 32-bit quantity at hardware speed and short division is simply a
1822   // series of such operations. This is just like doing short division but we
1823   // are using base 2^32 instead of base 10.
1824   assert(n != 0 && "Divide by zero?");
1825   if (n == 1) {
1826     unsigned divisor = V[0];
1827     unsigned remainder = 0;
1828     for (int i = m+n-1; i >= 0; i--) {
1829       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1830       if (partial_dividend == 0) {
1831         Q[i] = 0;
1832         remainder = 0;
1833       } else if (partial_dividend < divisor) {
1834         Q[i] = 0;
1835         remainder = (unsigned)partial_dividend;
1836       } else if (partial_dividend == divisor) {
1837         Q[i] = 1;
1838         remainder = 0;
1839       } else {
1840         Q[i] = (unsigned)(partial_dividend / divisor);
1841         remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1842       }
1843     }
1844     if (R)
1845       R[0] = remainder;
1846   } else {
1847     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1848     // case n > 1.
1849     KnuthDiv(U, V, Q, R, m, n);
1850   }
1851 
1852   // If the caller wants the quotient
1853   if (Quotient) {
1854     // Set up the Quotient value's memory.
1855     if (Quotient->BitWidth != LHS.BitWidth) {
1856       if (Quotient->isSingleWord())
1857         Quotient->VAL = 0;
1858       else
1859         delete [] Quotient->pVal;
1860       Quotient->BitWidth = LHS.BitWidth;
1861       if (!Quotient->isSingleWord())
1862         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1863     } else
1864       Quotient->clearAllBits();
1865 
1866     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1867     // order words.
1868     if (lhsWords == 1) {
1869       uint64_t tmp =
1870         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1871       if (Quotient->isSingleWord())
1872         Quotient->VAL = tmp;
1873       else
1874         Quotient->pVal[0] = tmp;
1875     } else {
1876       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1877       for (unsigned i = 0; i < lhsWords; ++i)
1878         Quotient->pVal[i] =
1879           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1880     }
1881   }
1882 
1883   // If the caller wants the remainder
1884   if (Remainder) {
1885     // Set up the Remainder value's memory.
1886     if (Remainder->BitWidth != RHS.BitWidth) {
1887       if (Remainder->isSingleWord())
1888         Remainder->VAL = 0;
1889       else
1890         delete [] Remainder->pVal;
1891       Remainder->BitWidth = RHS.BitWidth;
1892       if (!Remainder->isSingleWord())
1893         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1894     } else
1895       Remainder->clearAllBits();
1896 
1897     // The remainder is in R. Reconstitute the remainder into Remainder's low
1898     // order words.
1899     if (rhsWords == 1) {
1900       uint64_t tmp =
1901         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1902       if (Remainder->isSingleWord())
1903         Remainder->VAL = tmp;
1904       else
1905         Remainder->pVal[0] = tmp;
1906     } else {
1907       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1908       for (unsigned i = 0; i < rhsWords; ++i)
1909         Remainder->pVal[i] =
1910           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1911     }
1912   }
1913 
1914   // Clean up the memory we allocated.
1915   if (U != &SPACE[0]) {
1916     delete [] U;
1917     delete [] V;
1918     delete [] Q;
1919     delete [] R;
1920   }
1921 }
1922 
1923 APInt APInt::udiv(const APInt& RHS) const {
1924   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1925 
1926   // First, deal with the easy case
1927   if (isSingleWord()) {
1928     assert(RHS.VAL != 0 && "Divide by zero?");
1929     return APInt(BitWidth, VAL / RHS.VAL);
1930   }
1931 
1932   // Get some facts about the LHS and RHS number of bits and words
1933   unsigned rhsBits = RHS.getActiveBits();
1934   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1935   assert(rhsWords && "Divided by zero???");
1936   unsigned lhsBits = this->getActiveBits();
1937   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1938 
1939   // Deal with some degenerate cases
1940   if (!lhsWords)
1941     // 0 / X ===> 0
1942     return APInt(BitWidth, 0);
1943   else if (lhsWords < rhsWords || this->ult(RHS)) {
1944     // X / Y ===> 0, iff X < Y
1945     return APInt(BitWidth, 0);
1946   } else if (*this == RHS) {
1947     // X / X ===> 1
1948     return APInt(BitWidth, 1);
1949   } else if (lhsWords == 1 && rhsWords == 1) {
1950     // All high words are zero, just use native divide
1951     return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1952   }
1953 
1954   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1955   APInt Quotient(1,0); // to hold result.
1956   divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1957   return Quotient;
1958 }
1959 
1960 APInt APInt::urem(const APInt& RHS) const {
1961   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1962   if (isSingleWord()) {
1963     assert(RHS.VAL != 0 && "Remainder by zero?");
1964     return APInt(BitWidth, VAL % RHS.VAL);
1965   }
1966 
1967   // Get some facts about the LHS
1968   unsigned lhsBits = getActiveBits();
1969   unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1970 
1971   // Get some facts about the RHS
1972   unsigned rhsBits = RHS.getActiveBits();
1973   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1974   assert(rhsWords && "Performing remainder operation by zero ???");
1975 
1976   // Check the degenerate cases
1977   if (lhsWords == 0) {
1978     // 0 % Y ===> 0
1979     return APInt(BitWidth, 0);
1980   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1981     // X % Y ===> X, iff X < Y
1982     return *this;
1983   } else if (*this == RHS) {
1984     // X % X == 0;
1985     return APInt(BitWidth, 0);
1986   } else if (lhsWords == 1) {
1987     // All high words are zero, just use native remainder
1988     return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1989   }
1990 
1991   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1992   APInt Remainder(1,0);
1993   divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1994   return Remainder;
1995 }
1996 
1997 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1998                     APInt &Quotient, APInt &Remainder) {
1999   // Get some size facts about the dividend and divisor
2000   unsigned lhsBits  = LHS.getActiveBits();
2001   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
2002   unsigned rhsBits  = RHS.getActiveBits();
2003   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
2004 
2005   // Check the degenerate cases
2006   if (lhsWords == 0) {
2007     Quotient = 0;                // 0 / Y ===> 0
2008     Remainder = 0;               // 0 % Y ===> 0
2009     return;
2010   }
2011 
2012   if (lhsWords < rhsWords || LHS.ult(RHS)) {
2013     Remainder = LHS;            // X % Y ===> X, iff X < Y
2014     Quotient = 0;               // X / Y ===> 0, iff X < Y
2015     return;
2016   }
2017 
2018   if (LHS == RHS) {
2019     Quotient  = 1;              // X / X ===> 1
2020     Remainder = 0;              // X % X ===> 0;
2021     return;
2022   }
2023 
2024   if (lhsWords == 1 && rhsWords == 1) {
2025     // There is only one word to consider so use the native versions.
2026     uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
2027     uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
2028     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
2029     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
2030     return;
2031   }
2032 
2033   // Okay, lets do it the long way
2034   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2035 }
2036 
2037 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
2038   APInt Res = *this+RHS;
2039   Overflow = isNonNegative() == RHS.isNonNegative() &&
2040              Res.isNonNegative() != isNonNegative();
2041   return Res;
2042 }
2043 
2044 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
2045   APInt Res = *this+RHS;
2046   Overflow = Res.ult(RHS);
2047   return Res;
2048 }
2049 
2050 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
2051   APInt Res = *this - RHS;
2052   Overflow = isNonNegative() != RHS.isNonNegative() &&
2053              Res.isNonNegative() != isNonNegative();
2054   return Res;
2055 }
2056 
2057 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
2058   APInt Res = *this-RHS;
2059   Overflow = Res.ugt(*this);
2060   return Res;
2061 }
2062 
2063 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
2064   // MININT/-1  -->  overflow.
2065   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
2066   return sdiv(RHS);
2067 }
2068 
2069 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
2070   APInt Res = *this * RHS;
2071 
2072   if (*this != 0 && RHS != 0)
2073     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
2074   else
2075     Overflow = false;
2076   return Res;
2077 }
2078 
2079 APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
2080   Overflow = ShAmt >= getBitWidth();
2081   if (Overflow)
2082     ShAmt = getBitWidth()-1;
2083 
2084   if (isNonNegative()) // Don't allow sign change.
2085     Overflow = ShAmt >= countLeadingZeros();
2086   else
2087     Overflow = ShAmt >= countLeadingOnes();
2088 
2089   return *this << ShAmt;
2090 }
2091 
2092 
2093 
2094 
2095 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2096   // Check our assumptions here
2097   assert(!str.empty() && "Invalid string length");
2098   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2099          "Radix should be 2, 8, 10, or 16!");
2100 
2101   StringRef::iterator p = str.begin();
2102   size_t slen = str.size();
2103   bool isNeg = *p == '-';
2104   if (*p == '-' || *p == '+') {
2105     p++;
2106     slen--;
2107     assert(slen && "String is only a sign, needs a value.");
2108   }
2109   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2110   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2111   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2112   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2113          "Insufficient bit width");
2114 
2115   // Allocate memory
2116   if (!isSingleWord())
2117     pVal = getClearedMemory(getNumWords());
2118 
2119   // Figure out if we can shift instead of multiply
2120   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2121 
2122   // Set up an APInt for the digit to add outside the loop so we don't
2123   // constantly construct/destruct it.
2124   APInt apdigit(getBitWidth(), 0);
2125   APInt apradix(getBitWidth(), radix);
2126 
2127   // Enter digit traversal loop
2128   for (StringRef::iterator e = str.end(); p != e; ++p) {
2129     unsigned digit = getDigit(*p, radix);
2130     assert(digit < radix && "Invalid character in digit string");
2131 
2132     // Shift or multiply the value by the radix
2133     if (slen > 1) {
2134       if (shift)
2135         *this <<= shift;
2136       else
2137         *this *= apradix;
2138     }
2139 
2140     // Add in the digit we just interpreted
2141     if (apdigit.isSingleWord())
2142       apdigit.VAL = digit;
2143     else
2144       apdigit.pVal[0] = digit;
2145     *this += apdigit;
2146   }
2147   // If its negative, put it in two's complement form
2148   if (isNeg) {
2149     (*this)--;
2150     this->flipAllBits();
2151   }
2152 }
2153 
2154 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2155                      bool Signed) const {
2156   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2157          "Radix should be 2, 8, 10, or 16!");
2158 
2159   // First, check for a zero value and just short circuit the logic below.
2160   if (*this == 0) {
2161     Str.push_back('0');
2162     return;
2163   }
2164 
2165   static const char Digits[] = "0123456789ABCDEF";
2166 
2167   if (isSingleWord()) {
2168     char Buffer[65];
2169     char *BufPtr = Buffer+65;
2170 
2171     uint64_t N;
2172     if (!Signed) {
2173       N = getZExtValue();
2174     } else {
2175       int64_t I = getSExtValue();
2176       if (I >= 0) {
2177         N = I;
2178       } else {
2179         Str.push_back('-');
2180         N = -(uint64_t)I;
2181       }
2182     }
2183 
2184     while (N) {
2185       *--BufPtr = Digits[N % Radix];
2186       N /= Radix;
2187     }
2188     Str.append(BufPtr, Buffer+65);
2189     return;
2190   }
2191 
2192   APInt Tmp(*this);
2193 
2194   if (Signed && isNegative()) {
2195     // They want to print the signed version and it is a negative value
2196     // Flip the bits and add one to turn it into the equivalent positive
2197     // value and put a '-' in the result.
2198     Tmp.flipAllBits();
2199     Tmp++;
2200     Str.push_back('-');
2201   }
2202 
2203   // We insert the digits backward, then reverse them to get the right order.
2204   unsigned StartDig = Str.size();
2205 
2206   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2207   // because the number of bits per digit (1, 3 and 4 respectively) divides
2208   // equaly.  We just shift until the value is zero.
2209   if (Radix != 10) {
2210     // Just shift tmp right for each digit width until it becomes zero
2211     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2212     unsigned MaskAmt = Radix - 1;
2213 
2214     while (Tmp != 0) {
2215       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2216       Str.push_back(Digits[Digit]);
2217       Tmp = Tmp.lshr(ShiftAmt);
2218     }
2219   } else {
2220     APInt divisor(4, 10);
2221     while (Tmp != 0) {
2222       APInt APdigit(1, 0);
2223       APInt tmp2(Tmp.getBitWidth(), 0);
2224       divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2225              &APdigit);
2226       unsigned Digit = (unsigned)APdigit.getZExtValue();
2227       assert(Digit < Radix && "divide failed");
2228       Str.push_back(Digits[Digit]);
2229       Tmp = tmp2;
2230     }
2231   }
2232 
2233   // Reverse the digits before returning.
2234   std::reverse(Str.begin()+StartDig, Str.end());
2235 }
2236 
2237 /// toString - This returns the APInt as a std::string.  Note that this is an
2238 /// inefficient method.  It is better to pass in a SmallVector/SmallString
2239 /// to the methods above.
2240 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2241   SmallString<40> S;
2242   toString(S, Radix, Signed);
2243   return S.str();
2244 }
2245 
2246 
2247 void APInt::dump() const {
2248   SmallString<40> S, U;
2249   this->toStringUnsigned(U);
2250   this->toStringSigned(S);
2251   dbgs() << "APInt(" << BitWidth << "b, "
2252          << U.str() << "u " << S.str() << "s)";
2253 }
2254 
2255 void APInt::print(raw_ostream &OS, bool isSigned) const {
2256   SmallString<40> S;
2257   this->toString(S, 10, isSigned);
2258   OS << S.str();
2259 }
2260 
2261 // This implements a variety of operations on a representation of
2262 // arbitrary precision, two's-complement, bignum integer values.
2263 
2264 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2265 // and unrestricting assumption.
2266 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2267 COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2268 
2269 /* Some handy functions local to this file.  */
2270 namespace {
2271 
2272   /* Returns the integer part with the least significant BITS set.
2273      BITS cannot be zero.  */
2274   static inline integerPart
2275   lowBitMask(unsigned int bits)
2276   {
2277     assert(bits != 0 && bits <= integerPartWidth);
2278 
2279     return ~(integerPart) 0 >> (integerPartWidth - bits);
2280   }
2281 
2282   /* Returns the value of the lower half of PART.  */
2283   static inline integerPart
2284   lowHalf(integerPart part)
2285   {
2286     return part & lowBitMask(integerPartWidth / 2);
2287   }
2288 
2289   /* Returns the value of the upper half of PART.  */
2290   static inline integerPart
2291   highHalf(integerPart part)
2292   {
2293     return part >> (integerPartWidth / 2);
2294   }
2295 
2296   /* Returns the bit number of the most significant set bit of a part.
2297      If the input number has no bits set -1U is returned.  */
2298   static unsigned int
2299   partMSB(integerPart value)
2300   {
2301     unsigned int n, msb;
2302 
2303     if (value == 0)
2304       return -1U;
2305 
2306     n = integerPartWidth / 2;
2307 
2308     msb = 0;
2309     do {
2310       if (value >> n) {
2311         value >>= n;
2312         msb += n;
2313       }
2314 
2315       n >>= 1;
2316     } while (n);
2317 
2318     return msb;
2319   }
2320 
2321   /* Returns the bit number of the least significant set bit of a
2322      part.  If the input number has no bits set -1U is returned.  */
2323   static unsigned int
2324   partLSB(integerPart value)
2325   {
2326     unsigned int n, lsb;
2327 
2328     if (value == 0)
2329       return -1U;
2330 
2331     lsb = integerPartWidth - 1;
2332     n = integerPartWidth / 2;
2333 
2334     do {
2335       if (value << n) {
2336         value <<= n;
2337         lsb -= n;
2338       }
2339 
2340       n >>= 1;
2341     } while (n);
2342 
2343     return lsb;
2344   }
2345 }
2346 
2347 /* Sets the least significant part of a bignum to the input value, and
2348    zeroes out higher parts.  */
2349 void
2350 APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2351 {
2352   unsigned int i;
2353 
2354   assert(parts > 0);
2355 
2356   dst[0] = part;
2357   for (i = 1; i < parts; i++)
2358     dst[i] = 0;
2359 }
2360 
2361 /* Assign one bignum to another.  */
2362 void
2363 APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2364 {
2365   unsigned int i;
2366 
2367   for (i = 0; i < parts; i++)
2368     dst[i] = src[i];
2369 }
2370 
2371 /* Returns true if a bignum is zero, false otherwise.  */
2372 bool
2373 APInt::tcIsZero(const integerPart *src, unsigned int parts)
2374 {
2375   unsigned int i;
2376 
2377   for (i = 0; i < parts; i++)
2378     if (src[i])
2379       return false;
2380 
2381   return true;
2382 }
2383 
2384 /* Extract the given bit of a bignum; returns 0 or 1.  */
2385 int
2386 APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2387 {
2388   return (parts[bit / integerPartWidth] &
2389           ((integerPart) 1 << bit % integerPartWidth)) != 0;
2390 }
2391 
2392 /* Set the given bit of a bignum. */
2393 void
2394 APInt::tcSetBit(integerPart *parts, unsigned int bit)
2395 {
2396   parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2397 }
2398 
2399 /* Clears the given bit of a bignum. */
2400 void
2401 APInt::tcClearBit(integerPart *parts, unsigned int bit)
2402 {
2403   parts[bit / integerPartWidth] &=
2404     ~((integerPart) 1 << (bit % integerPartWidth));
2405 }
2406 
2407 /* Returns the bit number of the least significant set bit of a
2408    number.  If the input number has no bits set -1U is returned.  */
2409 unsigned int
2410 APInt::tcLSB(const integerPart *parts, unsigned int n)
2411 {
2412   unsigned int i, lsb;
2413 
2414   for (i = 0; i < n; i++) {
2415       if (parts[i] != 0) {
2416           lsb = partLSB(parts[i]);
2417 
2418           return lsb + i * integerPartWidth;
2419       }
2420   }
2421 
2422   return -1U;
2423 }
2424 
2425 /* Returns the bit number of the most significant set bit of a number.
2426    If the input number has no bits set -1U is returned.  */
2427 unsigned int
2428 APInt::tcMSB(const integerPart *parts, unsigned int n)
2429 {
2430   unsigned int msb;
2431 
2432   do {
2433     --n;
2434 
2435     if (parts[n] != 0) {
2436       msb = partMSB(parts[n]);
2437 
2438       return msb + n * integerPartWidth;
2439     }
2440   } while (n);
2441 
2442   return -1U;
2443 }
2444 
2445 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2446    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2447    the least significant bit of DST.  All high bits above srcBITS in
2448    DST are zero-filled.  */
2449 void
2450 APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2451                  unsigned int srcBits, unsigned int srcLSB)
2452 {
2453   unsigned int firstSrcPart, dstParts, shift, n;
2454 
2455   dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2456   assert(dstParts <= dstCount);
2457 
2458   firstSrcPart = srcLSB / integerPartWidth;
2459   tcAssign (dst, src + firstSrcPart, dstParts);
2460 
2461   shift = srcLSB % integerPartWidth;
2462   tcShiftRight (dst, dstParts, shift);
2463 
2464   /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2465      in DST.  If this is less that srcBits, append the rest, else
2466      clear the high bits.  */
2467   n = dstParts * integerPartWidth - shift;
2468   if (n < srcBits) {
2469     integerPart mask = lowBitMask (srcBits - n);
2470     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2471                           << n % integerPartWidth);
2472   } else if (n > srcBits) {
2473     if (srcBits % integerPartWidth)
2474       dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2475   }
2476 
2477   /* Clear high parts.  */
2478   while (dstParts < dstCount)
2479     dst[dstParts++] = 0;
2480 }
2481 
2482 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2483 integerPart
2484 APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2485              integerPart c, unsigned int parts)
2486 {
2487   unsigned int i;
2488 
2489   assert(c <= 1);
2490 
2491   for (i = 0; i < parts; i++) {
2492     integerPart l;
2493 
2494     l = dst[i];
2495     if (c) {
2496       dst[i] += rhs[i] + 1;
2497       c = (dst[i] <= l);
2498     } else {
2499       dst[i] += rhs[i];
2500       c = (dst[i] < l);
2501     }
2502   }
2503 
2504   return c;
2505 }
2506 
2507 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2508 integerPart
2509 APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2510                   integerPart c, unsigned int parts)
2511 {
2512   unsigned int i;
2513 
2514   assert(c <= 1);
2515 
2516   for (i = 0; i < parts; i++) {
2517     integerPart l;
2518 
2519     l = dst[i];
2520     if (c) {
2521       dst[i] -= rhs[i] + 1;
2522       c = (dst[i] >= l);
2523     } else {
2524       dst[i] -= rhs[i];
2525       c = (dst[i] > l);
2526     }
2527   }
2528 
2529   return c;
2530 }
2531 
2532 /* Negate a bignum in-place.  */
2533 void
2534 APInt::tcNegate(integerPart *dst, unsigned int parts)
2535 {
2536   tcComplement(dst, parts);
2537   tcIncrement(dst, parts);
2538 }
2539 
2540 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2541     DST  = SRC * MULTIPLIER + CARRY   if add is false
2542 
2543     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2544     they must start at the same point, i.e. DST == SRC.
2545 
2546     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2547     returned.  Otherwise DST is filled with the least significant
2548     DSTPARTS parts of the result, and if all of the omitted higher
2549     parts were zero return zero, otherwise overflow occurred and
2550     return one.  */
2551 int
2552 APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2553                       integerPart multiplier, integerPart carry,
2554                       unsigned int srcParts, unsigned int dstParts,
2555                       bool add)
2556 {
2557   unsigned int i, n;
2558 
2559   /* Otherwise our writes of DST kill our later reads of SRC.  */
2560   assert(dst <= src || dst >= src + srcParts);
2561   assert(dstParts <= srcParts + 1);
2562 
2563   /* N loops; minimum of dstParts and srcParts.  */
2564   n = dstParts < srcParts ? dstParts: srcParts;
2565 
2566   for (i = 0; i < n; i++) {
2567     integerPart low, mid, high, srcPart;
2568 
2569       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2570 
2571          This cannot overflow, because
2572 
2573          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2574 
2575          which is less than n^2.  */
2576 
2577     srcPart = src[i];
2578 
2579     if (multiplier == 0 || srcPart == 0)        {
2580       low = carry;
2581       high = 0;
2582     } else {
2583       low = lowHalf(srcPart) * lowHalf(multiplier);
2584       high = highHalf(srcPart) * highHalf(multiplier);
2585 
2586       mid = lowHalf(srcPart) * highHalf(multiplier);
2587       high += highHalf(mid);
2588       mid <<= integerPartWidth / 2;
2589       if (low + mid < low)
2590         high++;
2591       low += mid;
2592 
2593       mid = highHalf(srcPart) * lowHalf(multiplier);
2594       high += highHalf(mid);
2595       mid <<= integerPartWidth / 2;
2596       if (low + mid < low)
2597         high++;
2598       low += mid;
2599 
2600       /* Now add carry.  */
2601       if (low + carry < low)
2602         high++;
2603       low += carry;
2604     }
2605 
2606     if (add) {
2607       /* And now DST[i], and store the new low part there.  */
2608       if (low + dst[i] < low)
2609         high++;
2610       dst[i] += low;
2611     } else
2612       dst[i] = low;
2613 
2614     carry = high;
2615   }
2616 
2617   if (i < dstParts) {
2618     /* Full multiplication, there is no overflow.  */
2619     assert(i + 1 == dstParts);
2620     dst[i] = carry;
2621     return 0;
2622   } else {
2623     /* We overflowed if there is carry.  */
2624     if (carry)
2625       return 1;
2626 
2627     /* We would overflow if any significant unwritten parts would be
2628        non-zero.  This is true if any remaining src parts are non-zero
2629        and the multiplier is non-zero.  */
2630     if (multiplier)
2631       for (; i < srcParts; i++)
2632         if (src[i])
2633           return 1;
2634 
2635     /* We fitted in the narrow destination.  */
2636     return 0;
2637   }
2638 }
2639 
2640 /* DST = LHS * RHS, where DST has the same width as the operands and
2641    is filled with the least significant parts of the result.  Returns
2642    one if overflow occurred, otherwise zero.  DST must be disjoint
2643    from both operands.  */
2644 int
2645 APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2646                   const integerPart *rhs, unsigned int parts)
2647 {
2648   unsigned int i;
2649   int overflow;
2650 
2651   assert(dst != lhs && dst != rhs);
2652 
2653   overflow = 0;
2654   tcSet(dst, 0, parts);
2655 
2656   for (i = 0; i < parts; i++)
2657     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2658                                parts - i, true);
2659 
2660   return overflow;
2661 }
2662 
2663 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2664    operands.  No overflow occurs.  DST must be disjoint from both
2665    operands.  Returns the number of parts required to hold the
2666    result.  */
2667 unsigned int
2668 APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2669                       const integerPart *rhs, unsigned int lhsParts,
2670                       unsigned int rhsParts)
2671 {
2672   /* Put the narrower number on the LHS for less loops below.  */
2673   if (lhsParts > rhsParts) {
2674     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2675   } else {
2676     unsigned int n;
2677 
2678     assert(dst != lhs && dst != rhs);
2679 
2680     tcSet(dst, 0, rhsParts);
2681 
2682     for (n = 0; n < lhsParts; n++)
2683       tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2684 
2685     n = lhsParts + rhsParts;
2686 
2687     return n - (dst[n - 1] == 0);
2688   }
2689 }
2690 
2691 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2692    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2693    set REMAINDER to the remainder, return zero.  i.e.
2694 
2695    OLD_LHS = RHS * LHS + REMAINDER
2696 
2697    SCRATCH is a bignum of the same size as the operands and result for
2698    use by the routine; its contents need not be initialized and are
2699    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2700 */
2701 int
2702 APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2703                 integerPart *remainder, integerPart *srhs,
2704                 unsigned int parts)
2705 {
2706   unsigned int n, shiftCount;
2707   integerPart mask;
2708 
2709   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2710 
2711   shiftCount = tcMSB(rhs, parts) + 1;
2712   if (shiftCount == 0)
2713     return true;
2714 
2715   shiftCount = parts * integerPartWidth - shiftCount;
2716   n = shiftCount / integerPartWidth;
2717   mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2718 
2719   tcAssign(srhs, rhs, parts);
2720   tcShiftLeft(srhs, parts, shiftCount);
2721   tcAssign(remainder, lhs, parts);
2722   tcSet(lhs, 0, parts);
2723 
2724   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2725      the total.  */
2726   for (;;) {
2727       int compare;
2728 
2729       compare = tcCompare(remainder, srhs, parts);
2730       if (compare >= 0) {
2731         tcSubtract(remainder, srhs, 0, parts);
2732         lhs[n] |= mask;
2733       }
2734 
2735       if (shiftCount == 0)
2736         break;
2737       shiftCount--;
2738       tcShiftRight(srhs, parts, 1);
2739       if ((mask >>= 1) == 0)
2740         mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2741   }
2742 
2743   return false;
2744 }
2745 
2746 /* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2747    There are no restrictions on COUNT.  */
2748 void
2749 APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2750 {
2751   if (count) {
2752     unsigned int jump, shift;
2753 
2754     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2755     jump = count / integerPartWidth;
2756     shift = count % integerPartWidth;
2757 
2758     while (parts > jump) {
2759       integerPart part;
2760 
2761       parts--;
2762 
2763       /* dst[i] comes from the two parts src[i - jump] and, if we have
2764          an intra-part shift, src[i - jump - 1].  */
2765       part = dst[parts - jump];
2766       if (shift) {
2767         part <<= shift;
2768         if (parts >= jump + 1)
2769           part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2770       }
2771 
2772       dst[parts] = part;
2773     }
2774 
2775     while (parts > 0)
2776       dst[--parts] = 0;
2777   }
2778 }
2779 
2780 /* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2781    zero.  There are no restrictions on COUNT.  */
2782 void
2783 APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2784 {
2785   if (count) {
2786     unsigned int i, jump, shift;
2787 
2788     /* Jump is the inter-part jump; shift is is intra-part shift.  */
2789     jump = count / integerPartWidth;
2790     shift = count % integerPartWidth;
2791 
2792     /* Perform the shift.  This leaves the most significant COUNT bits
2793        of the result at zero.  */
2794     for (i = 0; i < parts; i++) {
2795       integerPart part;
2796 
2797       if (i + jump >= parts) {
2798         part = 0;
2799       } else {
2800         part = dst[i + jump];
2801         if (shift) {
2802           part >>= shift;
2803           if (i + jump + 1 < parts)
2804             part |= dst[i + jump + 1] << (integerPartWidth - shift);
2805         }
2806       }
2807 
2808       dst[i] = part;
2809     }
2810   }
2811 }
2812 
2813 /* Bitwise and of two bignums.  */
2814 void
2815 APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2816 {
2817   unsigned int i;
2818 
2819   for (i = 0; i < parts; i++)
2820     dst[i] &= rhs[i];
2821 }
2822 
2823 /* Bitwise inclusive or of two bignums.  */
2824 void
2825 APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2826 {
2827   unsigned int i;
2828 
2829   for (i = 0; i < parts; i++)
2830     dst[i] |= rhs[i];
2831 }
2832 
2833 /* Bitwise exclusive or of two bignums.  */
2834 void
2835 APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2836 {
2837   unsigned int i;
2838 
2839   for (i = 0; i < parts; i++)
2840     dst[i] ^= rhs[i];
2841 }
2842 
2843 /* Complement a bignum in-place.  */
2844 void
2845 APInt::tcComplement(integerPart *dst, unsigned int parts)
2846 {
2847   unsigned int i;
2848 
2849   for (i = 0; i < parts; i++)
2850     dst[i] = ~dst[i];
2851 }
2852 
2853 /* Comparison (unsigned) of two bignums.  */
2854 int
2855 APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2856                  unsigned int parts)
2857 {
2858   while (parts) {
2859       parts--;
2860       if (lhs[parts] == rhs[parts])
2861         continue;
2862 
2863       if (lhs[parts] > rhs[parts])
2864         return 1;
2865       else
2866         return -1;
2867     }
2868 
2869   return 0;
2870 }
2871 
2872 /* Increment a bignum in-place, return the carry flag.  */
2873 integerPart
2874 APInt::tcIncrement(integerPart *dst, unsigned int parts)
2875 {
2876   unsigned int i;
2877 
2878   for (i = 0; i < parts; i++)
2879     if (++dst[i] != 0)
2880       break;
2881 
2882   return i == parts;
2883 }
2884 
2885 /* Set the least significant BITS bits of a bignum, clear the
2886    rest.  */
2887 void
2888 APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2889                                  unsigned int bits)
2890 {
2891   unsigned int i;
2892 
2893   i = 0;
2894   while (bits > integerPartWidth) {
2895     dst[i++] = ~(integerPart) 0;
2896     bits -= integerPartWidth;
2897   }
2898 
2899   if (bits)
2900     dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2901 
2902   while (i < parts)
2903     dst[i++] = 0;
2904 }
2905