1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision integer
11 // constant values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <climits>
26 #include <cmath>
27 #include <cstdlib>
28 #include <cstring>
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "apint"
32 
33 /// A utility function for allocating memory, checking for allocation failures,
34 /// and ensuring the contents are zeroed.
35 inline static uint64_t* getClearedMemory(unsigned numWords) {
36   uint64_t * result = new uint64_t[numWords];
37   assert(result && "APInt memory allocation fails!");
38   memset(result, 0, numWords * sizeof(uint64_t));
39   return result;
40 }
41 
42 /// A utility function for allocating memory and checking for allocation
43 /// failure.  The content is not zeroed.
44 inline static uint64_t* getMemory(unsigned numWords) {
45   uint64_t * result = new uint64_t[numWords];
46   assert(result && "APInt memory allocation fails!");
47   return result;
48 }
49 
50 /// A utility function that converts a character to a digit.
51 inline static unsigned getDigit(char cdigit, uint8_t radix) {
52   unsigned r;
53 
54   if (radix == 16 || radix == 36) {
55     r = cdigit - '0';
56     if (r <= 9)
57       return r;
58 
59     r = cdigit - 'A';
60     if (r <= radix - 11U)
61       return r + 10;
62 
63     r = cdigit - 'a';
64     if (r <= radix - 11U)
65       return r + 10;
66 
67     radix = 10;
68   }
69 
70   r = cdigit - '0';
71   if (r < radix)
72     return r;
73 
74   return -1U;
75 }
76 
77 
78 void APInt::initSlowCase(uint64_t val, bool isSigned) {
79   U.pVal = getClearedMemory(getNumWords());
80   U.pVal[0] = val;
81   if (isSigned && int64_t(val) < 0)
82     for (unsigned i = 1; i < getNumWords(); ++i)
83       U.pVal[i] = WORD_MAX;
84   clearUnusedBits();
85 }
86 
87 void APInt::initSlowCase(const APInt& that) {
88   U.pVal = getMemory(getNumWords());
89   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
90 }
91 
92 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
93   assert(BitWidth && "Bitwidth too small");
94   assert(bigVal.data() && "Null pointer detected!");
95   if (isSingleWord())
96     U.VAL = bigVal[0];
97   else {
98     // Get memory, cleared to 0
99     U.pVal = getClearedMemory(getNumWords());
100     // Calculate the number of words to copy
101     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
102     // Copy the words from bigVal to pVal
103     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
104   }
105   // Make sure unused high bits are cleared
106   clearUnusedBits();
107 }
108 
109 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
110   : BitWidth(numBits) {
111   initFromArray(bigVal);
112 }
113 
114 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
115   : BitWidth(numBits) {
116   initFromArray(makeArrayRef(bigVal, numWords));
117 }
118 
119 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
120   : BitWidth(numbits) {
121   assert(BitWidth && "Bitwidth too small");
122   fromString(numbits, Str, radix);
123 }
124 
125 void APInt::AssignSlowCase(const APInt& RHS) {
126   // Don't do anything for X = X
127   if (this == &RHS)
128     return;
129 
130   if (BitWidth == RHS.getBitWidth()) {
131     // assume same bit-width single-word case is already handled
132     assert(!isSingleWord());
133     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
134     return;
135   }
136 
137   if (isSingleWord()) {
138     // assume case where both are single words is already handled
139     assert(!RHS.isSingleWord());
140     U.pVal = getMemory(RHS.getNumWords());
141     memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
142   } else if (getNumWords() == RHS.getNumWords())
143     memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
144   else if (RHS.isSingleWord()) {
145     delete [] U.pVal;
146     U.VAL = RHS.U.VAL;
147   } else {
148     delete [] U.pVal;
149     U.pVal = getMemory(RHS.getNumWords());
150     memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
151   }
152   BitWidth = RHS.BitWidth;
153   clearUnusedBits();
154 }
155 
156 /// This method 'profiles' an APInt for use with FoldingSet.
157 void APInt::Profile(FoldingSetNodeID& ID) const {
158   ID.AddInteger(BitWidth);
159 
160   if (isSingleWord()) {
161     ID.AddInteger(U.VAL);
162     return;
163   }
164 
165   unsigned NumWords = getNumWords();
166   for (unsigned i = 0; i < NumWords; ++i)
167     ID.AddInteger(U.pVal[i]);
168 }
169 
170 /// @brief Prefix increment operator. Increments the APInt by one.
171 APInt& APInt::operator++() {
172   if (isSingleWord())
173     ++U.VAL;
174   else
175     tcIncrement(U.pVal, getNumWords());
176   return clearUnusedBits();
177 }
178 
179 /// @brief Prefix decrement operator. Decrements the APInt by one.
180 APInt& APInt::operator--() {
181   if (isSingleWord())
182     --U.VAL;
183   else
184     tcDecrement(U.pVal, getNumWords());
185   return clearUnusedBits();
186 }
187 
188 /// Adds the RHS APint to this APInt.
189 /// @returns this, after addition of RHS.
190 /// @brief Addition assignment operator.
191 APInt& APInt::operator+=(const APInt& RHS) {
192   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
193   if (isSingleWord())
194     U.VAL += RHS.U.VAL;
195   else
196     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
197   return clearUnusedBits();
198 }
199 
200 APInt& APInt::operator+=(uint64_t RHS) {
201   if (isSingleWord())
202     U.VAL += RHS;
203   else
204     tcAddPart(U.pVal, RHS, getNumWords());
205   return clearUnusedBits();
206 }
207 
208 /// Subtracts the RHS APInt from this APInt
209 /// @returns this, after subtraction
210 /// @brief Subtraction assignment operator.
211 APInt& APInt::operator-=(const APInt& RHS) {
212   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
213   if (isSingleWord())
214     U.VAL -= RHS.U.VAL;
215   else
216     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
217   return clearUnusedBits();
218 }
219 
220 APInt& APInt::operator-=(uint64_t RHS) {
221   if (isSingleWord())
222     U.VAL -= RHS;
223   else
224     tcSubtractPart(U.pVal, RHS, getNumWords());
225   return clearUnusedBits();
226 }
227 
228 APInt APInt::operator*(const APInt& RHS) const {
229   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
230   if (isSingleWord())
231     return APInt(BitWidth, U.VAL * RHS.U.VAL);
232 
233   APInt Result(getMemory(getNumWords()), getBitWidth());
234 
235   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
236 
237   Result.clearUnusedBits();
238   return Result;
239 }
240 
241 void APInt::AndAssignSlowCase(const APInt& RHS) {
242   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
243 }
244 
245 void APInt::OrAssignSlowCase(const APInt& RHS) {
246   tcOr(U.pVal, RHS.U.pVal, getNumWords());
247 }
248 
249 void APInt::XorAssignSlowCase(const APInt& RHS) {
250   tcXor(U.pVal, RHS.U.pVal, getNumWords());
251 }
252 
253 APInt& APInt::operator*=(const APInt& RHS) {
254   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
255   *this = *this * RHS;
256   return *this;
257 }
258 
259 bool APInt::EqualSlowCase(const APInt& RHS) const {
260   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
261 }
262 
263 int APInt::compare(const APInt& RHS) const {
264   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
265   if (isSingleWord())
266     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
267 
268   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
269 }
270 
271 int APInt::compareSigned(const APInt& RHS) const {
272   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
273   if (isSingleWord()) {
274     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
275     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
276     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
277   }
278 
279   bool lhsNeg = isNegative();
280   bool rhsNeg = RHS.isNegative();
281 
282   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
283   if (lhsNeg != rhsNeg)
284     return lhsNeg ? -1 : 1;
285 
286   // Otherwise we can just use an unsigned comparison, because even negative
287   // numbers compare correctly this way if both have the same signed-ness.
288   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
289 }
290 
291 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
292   unsigned loWord = whichWord(loBit);
293   unsigned hiWord = whichWord(hiBit);
294 
295   // Create an initial mask for the low word with zeros below loBit.
296   uint64_t loMask = WORD_MAX << whichBit(loBit);
297 
298   // If hiBit is not aligned, we need a high mask.
299   unsigned hiShiftAmt = whichBit(hiBit);
300   if (hiShiftAmt != 0) {
301     // Create a high mask with zeros above hiBit.
302     uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
303     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
304     // set the bits in hiWord.
305     if (hiWord == loWord)
306       loMask &= hiMask;
307     else
308       U.pVal[hiWord] |= hiMask;
309   }
310   // Apply the mask to the low word.
311   U.pVal[loWord] |= loMask;
312 
313   // Fill any words between loWord and hiWord with all ones.
314   for (unsigned word = loWord + 1; word < hiWord; ++word)
315     U.pVal[word] = WORD_MAX;
316 }
317 
318 /// @brief Toggle every bit to its opposite value.
319 void APInt::flipAllBitsSlowCase() {
320   tcComplement(U.pVal, getNumWords());
321   clearUnusedBits();
322 }
323 
324 /// Toggle a given bit to its opposite value whose position is given
325 /// as "bitPosition".
326 /// @brief Toggles a given bit to its opposite value.
327 void APInt::flipBit(unsigned bitPosition) {
328   assert(bitPosition < BitWidth && "Out of the bit-width range!");
329   if ((*this)[bitPosition]) clearBit(bitPosition);
330   else setBit(bitPosition);
331 }
332 
333 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
334   unsigned subBitWidth = subBits.getBitWidth();
335   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
336          "Illegal bit insertion");
337 
338   // Insertion is a direct copy.
339   if (subBitWidth == BitWidth) {
340     *this = subBits;
341     return;
342   }
343 
344   // Single word result can be done as a direct bitmask.
345   if (isSingleWord()) {
346     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
347     U.VAL &= ~(mask << bitPosition);
348     U.VAL |= (subBits.U.VAL << bitPosition);
349     return;
350   }
351 
352   unsigned loBit = whichBit(bitPosition);
353   unsigned loWord = whichWord(bitPosition);
354   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
355 
356   // Insertion within a single word can be done as a direct bitmask.
357   if (loWord == hi1Word) {
358     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359     U.pVal[loWord] &= ~(mask << loBit);
360     U.pVal[loWord] |= (subBits.U.VAL << loBit);
361     return;
362   }
363 
364   // Insert on word boundaries.
365   if (loBit == 0) {
366     // Direct copy whole words.
367     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
368     memcpy(U.pVal + loWord, subBits.getRawData(),
369            numWholeSubWords * APINT_WORD_SIZE);
370 
371     // Mask+insert remaining bits.
372     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
373     if (remainingBits != 0) {
374       uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits);
375       U.pVal[hi1Word] &= ~mask;
376       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
377     }
378     return;
379   }
380 
381   // General case - set/clear individual bits in dst based on src.
382   // TODO - there is scope for optimization here, but at the moment this code
383   // path is barely used so prefer readability over performance.
384   for (unsigned i = 0; i != subBitWidth; ++i) {
385     if (subBits[i])
386       setBit(bitPosition + i);
387     else
388       clearBit(bitPosition + i);
389   }
390 }
391 
392 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
393   assert(numBits > 0 && "Can't extract zero bits");
394   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
395          "Illegal bit extraction");
396 
397   if (isSingleWord())
398     return APInt(numBits, U.VAL >> bitPosition);
399 
400   unsigned loBit = whichBit(bitPosition);
401   unsigned loWord = whichWord(bitPosition);
402   unsigned hiWord = whichWord(bitPosition + numBits - 1);
403 
404   // Single word result extracting bits from a single word source.
405   if (loWord == hiWord)
406     return APInt(numBits, U.pVal[loWord] >> loBit);
407 
408   // Extracting bits that start on a source word boundary can be done
409   // as a fast memory copy.
410   if (loBit == 0)
411     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
412 
413   // General case - shift + copy source words directly into place.
414   APInt Result(numBits, 0);
415   unsigned NumSrcWords = getNumWords();
416   unsigned NumDstWords = Result.getNumWords();
417 
418   for (unsigned word = 0; word < NumDstWords; ++word) {
419     uint64_t w0 = U.pVal[loWord + word];
420     uint64_t w1 =
421         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
422     Result.U.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
423   }
424 
425   return Result.clearUnusedBits();
426 }
427 
428 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
429   assert(!str.empty() && "Invalid string length");
430   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
431           radix == 36) &&
432          "Radix should be 2, 8, 10, 16, or 36!");
433 
434   size_t slen = str.size();
435 
436   // Each computation below needs to know if it's negative.
437   StringRef::iterator p = str.begin();
438   unsigned isNegative = *p == '-';
439   if (*p == '-' || *p == '+') {
440     p++;
441     slen--;
442     assert(slen && "String is only a sign, needs a value.");
443   }
444 
445   // For radixes of power-of-two values, the bits required is accurately and
446   // easily computed
447   if (radix == 2)
448     return slen + isNegative;
449   if (radix == 8)
450     return slen * 3 + isNegative;
451   if (radix == 16)
452     return slen * 4 + isNegative;
453 
454   // FIXME: base 36
455 
456   // This is grossly inefficient but accurate. We could probably do something
457   // with a computation of roughly slen*64/20 and then adjust by the value of
458   // the first few digits. But, I'm not sure how accurate that could be.
459 
460   // Compute a sufficient number of bits that is always large enough but might
461   // be too large. This avoids the assertion in the constructor. This
462   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
463   // bits in that case.
464   unsigned sufficient
465     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
466                  : (slen == 1 ? 7 : slen * 16/3);
467 
468   // Convert to the actual binary value.
469   APInt tmp(sufficient, StringRef(p, slen), radix);
470 
471   // Compute how many bits are required. If the log is infinite, assume we need
472   // just bit.
473   unsigned log = tmp.logBase2();
474   if (log == (unsigned)-1) {
475     return isNegative + 1;
476   } else {
477     return isNegative + log + 1;
478   }
479 }
480 
481 hash_code llvm::hash_value(const APInt &Arg) {
482   if (Arg.isSingleWord())
483     return hash_combine(Arg.U.VAL);
484 
485   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
486 }
487 
488 bool APInt::isSplat(unsigned SplatSizeInBits) const {
489   assert(getBitWidth() % SplatSizeInBits == 0 &&
490          "SplatSizeInBits must divide width!");
491   // We can check that all parts of an integer are equal by making use of a
492   // little trick: rotate and check if it's still the same value.
493   return *this == rotl(SplatSizeInBits);
494 }
495 
496 /// This function returns the high "numBits" bits of this APInt.
497 APInt APInt::getHiBits(unsigned numBits) const {
498   return this->lshr(BitWidth - numBits);
499 }
500 
501 /// This function returns the low "numBits" bits of this APInt.
502 APInt APInt::getLoBits(unsigned numBits) const {
503   APInt Result(getLowBitsSet(BitWidth, numBits));
504   Result &= *this;
505   return Result;
506 }
507 
508 /// Return a value containing V broadcasted over NewLen bits.
509 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
510   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
511 
512   APInt Val = V.zextOrSelf(NewLen);
513   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
514     Val |= Val << I;
515 
516   return Val;
517 }
518 
519 unsigned APInt::countLeadingZerosSlowCase() const {
520   unsigned Count = 0;
521   for (int i = getNumWords()-1; i >= 0; --i) {
522     uint64_t V = U.pVal[i];
523     if (V == 0)
524       Count += APINT_BITS_PER_WORD;
525     else {
526       Count += llvm::countLeadingZeros(V);
527       break;
528     }
529   }
530   // Adjust for unused bits in the most significant word (they are zero).
531   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
532   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
533   return Count;
534 }
535 
536 unsigned APInt::countLeadingOnes() const {
537   if (isSingleWord())
538     return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
539 
540   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
541   unsigned shift;
542   if (!highWordBits) {
543     highWordBits = APINT_BITS_PER_WORD;
544     shift = 0;
545   } else {
546     shift = APINT_BITS_PER_WORD - highWordBits;
547   }
548   int i = getNumWords() - 1;
549   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
550   if (Count == highWordBits) {
551     for (i--; i >= 0; --i) {
552       if (U.pVal[i] == WORD_MAX)
553         Count += APINT_BITS_PER_WORD;
554       else {
555         Count += llvm::countLeadingOnes(U.pVal[i]);
556         break;
557       }
558     }
559   }
560   return Count;
561 }
562 
563 unsigned APInt::countTrailingZeros() const {
564   if (isSingleWord())
565     return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth);
566   unsigned Count = 0;
567   unsigned i = 0;
568   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
569     Count += APINT_BITS_PER_WORD;
570   if (i < getNumWords())
571     Count += llvm::countTrailingZeros(U.pVal[i]);
572   return std::min(Count, BitWidth);
573 }
574 
575 unsigned APInt::countTrailingOnesSlowCase() const {
576   unsigned Count = 0;
577   unsigned i = 0;
578   for (; i < getNumWords() && U.pVal[i] == WORD_MAX; ++i)
579     Count += APINT_BITS_PER_WORD;
580   if (i < getNumWords())
581     Count += llvm::countTrailingOnes(U.pVal[i]);
582   assert(Count <= BitWidth);
583   return Count;
584 }
585 
586 unsigned APInt::countPopulationSlowCase() const {
587   unsigned Count = 0;
588   for (unsigned i = 0; i < getNumWords(); ++i)
589     Count += llvm::countPopulation(U.pVal[i]);
590   return Count;
591 }
592 
593 bool APInt::intersectsSlowCase(const APInt &RHS) const {
594   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
595     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
596       return true;
597 
598   return false;
599 }
600 
601 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
602   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
603     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
604       return false;
605 
606   return true;
607 }
608 
609 APInt APInt::byteSwap() const {
610   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
611   if (BitWidth == 16)
612     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
613   if (BitWidth == 32)
614     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
615   if (BitWidth == 48) {
616     unsigned Tmp1 = unsigned(U.VAL >> 16);
617     Tmp1 = ByteSwap_32(Tmp1);
618     uint16_t Tmp2 = uint16_t(U.VAL);
619     Tmp2 = ByteSwap_16(Tmp2);
620     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
621   }
622   if (BitWidth == 64)
623     return APInt(BitWidth, ByteSwap_64(U.VAL));
624 
625   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
626   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
627     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
628   if (Result.BitWidth != BitWidth) {
629     Result.lshrInPlace(Result.BitWidth - BitWidth);
630     Result.BitWidth = BitWidth;
631   }
632   return Result;
633 }
634 
635 APInt APInt::reverseBits() const {
636   switch (BitWidth) {
637   case 64:
638     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
639   case 32:
640     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
641   case 16:
642     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
643   case 8:
644     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
645   default:
646     break;
647   }
648 
649   APInt Val(*this);
650   APInt Reversed(BitWidth, 0);
651   unsigned S = BitWidth;
652 
653   for (; Val != 0; Val.lshrInPlace(1)) {
654     Reversed <<= 1;
655     Reversed |= Val[0];
656     --S;
657   }
658 
659   Reversed <<= S;
660   return Reversed;
661 }
662 
663 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
664   // Fast-path a common case.
665   if (A == B) return A;
666 
667   // Corner cases: if either operand is zero, the other is the gcd.
668   if (!A) return B;
669   if (!B) return A;
670 
671   // Count common powers of 2 and remove all other powers of 2.
672   unsigned Pow2;
673   {
674     unsigned Pow2_A = A.countTrailingZeros();
675     unsigned Pow2_B = B.countTrailingZeros();
676     if (Pow2_A > Pow2_B) {
677       A.lshrInPlace(Pow2_A - Pow2_B);
678       Pow2 = Pow2_B;
679     } else if (Pow2_B > Pow2_A) {
680       B.lshrInPlace(Pow2_B - Pow2_A);
681       Pow2 = Pow2_A;
682     } else {
683       Pow2 = Pow2_A;
684     }
685   }
686 
687   // Both operands are odd multiples of 2^Pow_2:
688   //
689   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
690   //
691   // This is a modified version of Stein's algorithm, taking advantage of
692   // efficient countTrailingZeros().
693   while (A != B) {
694     if (A.ugt(B)) {
695       A -= B;
696       A.lshrInPlace(A.countTrailingZeros() - Pow2);
697     } else {
698       B -= A;
699       B.lshrInPlace(B.countTrailingZeros() - Pow2);
700     }
701   }
702 
703   return A;
704 }
705 
706 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
707   union {
708     double D;
709     uint64_t I;
710   } T;
711   T.D = Double;
712 
713   // Get the sign bit from the highest order bit
714   bool isNeg = T.I >> 63;
715 
716   // Get the 11-bit exponent and adjust for the 1023 bit bias
717   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
718 
719   // If the exponent is negative, the value is < 0 so just return 0.
720   if (exp < 0)
721     return APInt(width, 0u);
722 
723   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
724   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
725 
726   // If the exponent doesn't shift all bits out of the mantissa
727   if (exp < 52)
728     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
729                     APInt(width, mantissa >> (52 - exp));
730 
731   // If the client didn't provide enough bits for us to shift the mantissa into
732   // then the result is undefined, just return 0
733   if (width <= exp - 52)
734     return APInt(width, 0);
735 
736   // Otherwise, we have to shift the mantissa bits up to the right location
737   APInt Tmp(width, mantissa);
738   Tmp <<= (unsigned)exp - 52;
739   return isNeg ? -Tmp : Tmp;
740 }
741 
742 /// This function converts this APInt to a double.
743 /// The layout for double is as following (IEEE Standard 754):
744 ///  --------------------------------------
745 /// |  Sign    Exponent    Fraction    Bias |
746 /// |-------------------------------------- |
747 /// |  1[63]   11[62-52]   52[51-00]   1023 |
748 ///  --------------------------------------
749 double APInt::roundToDouble(bool isSigned) const {
750 
751   // Handle the simple case where the value is contained in one uint64_t.
752   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
753   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
754     if (isSigned) {
755       int64_t sext = SignExtend64(getWord(0), BitWidth);
756       return double(sext);
757     } else
758       return double(getWord(0));
759   }
760 
761   // Determine if the value is negative.
762   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
763 
764   // Construct the absolute value if we're negative.
765   APInt Tmp(isNeg ? -(*this) : (*this));
766 
767   // Figure out how many bits we're using.
768   unsigned n = Tmp.getActiveBits();
769 
770   // The exponent (without bias normalization) is just the number of bits
771   // we are using. Note that the sign bit is gone since we constructed the
772   // absolute value.
773   uint64_t exp = n;
774 
775   // Return infinity for exponent overflow
776   if (exp > 1023) {
777     if (!isSigned || !isNeg)
778       return std::numeric_limits<double>::infinity();
779     else
780       return -std::numeric_limits<double>::infinity();
781   }
782   exp += 1023; // Increment for 1023 bias
783 
784   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
785   // extract the high 52 bits from the correct words in pVal.
786   uint64_t mantissa;
787   unsigned hiWord = whichWord(n-1);
788   if (hiWord == 0) {
789     mantissa = Tmp.U.pVal[0];
790     if (n > 52)
791       mantissa >>= n - 52; // shift down, we want the top 52 bits.
792   } else {
793     assert(hiWord > 0 && "huh?");
794     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
795     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
796     mantissa = hibits | lobits;
797   }
798 
799   // The leading bit of mantissa is implicit, so get rid of it.
800   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
801   union {
802     double D;
803     uint64_t I;
804   } T;
805   T.I = sign | (exp << 52) | mantissa;
806   return T.D;
807 }
808 
809 // Truncate to new width.
810 APInt APInt::trunc(unsigned width) const {
811   assert(width < BitWidth && "Invalid APInt Truncate request");
812   assert(width && "Can't truncate to 0 bits");
813 
814   if (width <= APINT_BITS_PER_WORD)
815     return APInt(width, getRawData()[0]);
816 
817   APInt Result(getMemory(getNumWords(width)), width);
818 
819   // Copy full words.
820   unsigned i;
821   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
822     Result.U.pVal[i] = U.pVal[i];
823 
824   // Truncate and copy any partial word.
825   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
826   if (bits != 0)
827     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
828 
829   return Result;
830 }
831 
832 // Sign extend to a new width.
833 APInt APInt::sext(unsigned Width) const {
834   assert(Width > BitWidth && "Invalid APInt SignExtend request");
835 
836   if (Width <= APINT_BITS_PER_WORD)
837     return APInt(Width, SignExtend64(U.VAL, BitWidth));
838 
839   APInt Result(getMemory(getNumWords(Width)), Width);
840 
841   // Copy words.
842   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
843 
844   // Sign extend the last word since there may be unused bits in the input.
845   Result.U.pVal[getNumWords() - 1] =
846       SignExtend64(Result.U.pVal[getNumWords() - 1],
847                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
848 
849   // Fill with sign bits.
850   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
851               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
852   Result.clearUnusedBits();
853   return Result;
854 }
855 
856 //  Zero extend to a new width.
857 APInt APInt::zext(unsigned width) const {
858   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
859 
860   if (width <= APINT_BITS_PER_WORD)
861     return APInt(width, U.VAL);
862 
863   APInt Result(getMemory(getNumWords(width)), width);
864 
865   // Copy words.
866   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
867 
868   // Zero remaining words.
869   std::memset(Result.U.pVal + getNumWords(), 0,
870               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
871 
872   return Result;
873 }
874 
875 APInt APInt::zextOrTrunc(unsigned width) const {
876   if (BitWidth < width)
877     return zext(width);
878   if (BitWidth > width)
879     return trunc(width);
880   return *this;
881 }
882 
883 APInt APInt::sextOrTrunc(unsigned width) const {
884   if (BitWidth < width)
885     return sext(width);
886   if (BitWidth > width)
887     return trunc(width);
888   return *this;
889 }
890 
891 APInt APInt::zextOrSelf(unsigned width) const {
892   if (BitWidth < width)
893     return zext(width);
894   return *this;
895 }
896 
897 APInt APInt::sextOrSelf(unsigned width) const {
898   if (BitWidth < width)
899     return sext(width);
900   return *this;
901 }
902 
903 /// Arithmetic right-shift this APInt by shiftAmt.
904 /// @brief Arithmetic right-shift function.
905 void APInt::ashrInPlace(const APInt &shiftAmt) {
906   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
907 }
908 
909 /// Arithmetic right-shift this APInt by shiftAmt.
910 /// @brief Arithmetic right-shift function.
911 void APInt::ashrSlowCase(unsigned ShiftAmt) {
912   // Don't bother performing a no-op shift.
913   if (!ShiftAmt)
914     return;
915 
916   // Save the original sign bit for later.
917   bool Negative = isNegative();
918 
919   // WordShift is the inter-part shift; BitShift is is intra-part shift.
920   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
921   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
922 
923   unsigned WordsToMove = getNumWords() - WordShift;
924   if (WordsToMove != 0) {
925     // Sign extend the last word to fill in the unused bits.
926     U.pVal[getNumWords() - 1] = SignExtend64(
927         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
928 
929     // Fastpath for moving by whole words.
930     if (BitShift == 0) {
931       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
932     } else {
933       // Move the words containing significant bits.
934       for (unsigned i = 0; i != WordsToMove - 1; ++i)
935         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
936                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
937 
938       // Handle the last word which has no high bits to copy.
939       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
940       // Sign extend one more time.
941       U.pVal[WordsToMove - 1] =
942           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
943     }
944   }
945 
946   // Fill in the remainder based on the original sign.
947   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
948               WordShift * APINT_WORD_SIZE);
949   clearUnusedBits();
950 }
951 
952 /// Logical right-shift this APInt by shiftAmt.
953 /// @brief Logical right-shift function.
954 void APInt::lshrInPlace(const APInt &shiftAmt) {
955   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
956 }
957 
958 /// Logical right-shift this APInt by shiftAmt.
959 /// @brief Logical right-shift function.
960 void APInt::lshrSlowCase(unsigned ShiftAmt) {
961   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
962 }
963 
964 /// Left-shift this APInt by shiftAmt.
965 /// @brief Left-shift function.
966 APInt &APInt::operator<<=(const APInt &shiftAmt) {
967   // It's undefined behavior in C to shift by BitWidth or greater.
968   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
969   return *this;
970 }
971 
972 void APInt::shlSlowCase(unsigned ShiftAmt) {
973   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
974   clearUnusedBits();
975 }
976 
977 // Calculate the rotate amount modulo the bit width.
978 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
979   unsigned rotBitWidth = rotateAmt.getBitWidth();
980   APInt rot = rotateAmt;
981   if (rotBitWidth < BitWidth) {
982     // Extend the rotate APInt, so that the urem doesn't divide by 0.
983     // e.g. APInt(1, 32) would give APInt(1, 0).
984     rot = rotateAmt.zext(BitWidth);
985   }
986   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
987   return rot.getLimitedValue(BitWidth);
988 }
989 
990 APInt APInt::rotl(const APInt &rotateAmt) const {
991   return rotl(rotateModulo(BitWidth, rotateAmt));
992 }
993 
994 APInt APInt::rotl(unsigned rotateAmt) const {
995   rotateAmt %= BitWidth;
996   if (rotateAmt == 0)
997     return *this;
998   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
999 }
1000 
1001 APInt APInt::rotr(const APInt &rotateAmt) const {
1002   return rotr(rotateModulo(BitWidth, rotateAmt));
1003 }
1004 
1005 APInt APInt::rotr(unsigned rotateAmt) const {
1006   rotateAmt %= BitWidth;
1007   if (rotateAmt == 0)
1008     return *this;
1009   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1010 }
1011 
1012 // Square Root - this method computes and returns the square root of "this".
1013 // Three mechanisms are used for computation. For small values (<= 5 bits),
1014 // a table lookup is done. This gets some performance for common cases. For
1015 // values using less than 52 bits, the value is converted to double and then
1016 // the libc sqrt function is called. The result is rounded and then converted
1017 // back to a uint64_t which is then used to construct the result. Finally,
1018 // the Babylonian method for computing square roots is used.
1019 APInt APInt::sqrt() const {
1020 
1021   // Determine the magnitude of the value.
1022   unsigned magnitude = getActiveBits();
1023 
1024   // Use a fast table for some small values. This also gets rid of some
1025   // rounding errors in libc sqrt for small values.
1026   if (magnitude <= 5) {
1027     static const uint8_t results[32] = {
1028       /*     0 */ 0,
1029       /*  1- 2 */ 1, 1,
1030       /*  3- 6 */ 2, 2, 2, 2,
1031       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1032       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1033       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1034       /*    31 */ 6
1035     };
1036     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1037   }
1038 
1039   // If the magnitude of the value fits in less than 52 bits (the precision of
1040   // an IEEE double precision floating point value), then we can use the
1041   // libc sqrt function which will probably use a hardware sqrt computation.
1042   // This should be faster than the algorithm below.
1043   if (magnitude < 52) {
1044     return APInt(BitWidth,
1045                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1046                                                                : U.pVal[0])))));
1047   }
1048 
1049   // Okay, all the short cuts are exhausted. We must compute it. The following
1050   // is a classical Babylonian method for computing the square root. This code
1051   // was adapted to APInt from a wikipedia article on such computations.
1052   // See http://www.wikipedia.org/ and go to the page named
1053   // Calculate_an_integer_square_root.
1054   unsigned nbits = BitWidth, i = 4;
1055   APInt testy(BitWidth, 16);
1056   APInt x_old(BitWidth, 1);
1057   APInt x_new(BitWidth, 0);
1058   APInt two(BitWidth, 2);
1059 
1060   // Select a good starting value using binary logarithms.
1061   for (;; i += 2, testy = testy.shl(2))
1062     if (i >= nbits || this->ule(testy)) {
1063       x_old = x_old.shl(i / 2);
1064       break;
1065     }
1066 
1067   // Use the Babylonian method to arrive at the integer square root:
1068   for (;;) {
1069     x_new = (this->udiv(x_old) + x_old).udiv(two);
1070     if (x_old.ule(x_new))
1071       break;
1072     x_old = x_new;
1073   }
1074 
1075   // Make sure we return the closest approximation
1076   // NOTE: The rounding calculation below is correct. It will produce an
1077   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1078   // determined to be a rounding issue with pari/gp as it begins to use a
1079   // floating point representation after 192 bits. There are no discrepancies
1080   // between this algorithm and pari/gp for bit widths < 192 bits.
1081   APInt square(x_old * x_old);
1082   APInt nextSquare((x_old + 1) * (x_old +1));
1083   if (this->ult(square))
1084     return x_old;
1085   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1086   APInt midpoint((nextSquare - square).udiv(two));
1087   APInt offset(*this - square);
1088   if (offset.ult(midpoint))
1089     return x_old;
1090   return x_old + 1;
1091 }
1092 
1093 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1094 /// iterative extended Euclidean algorithm is used to solve for this value,
1095 /// however we simplify it to speed up calculating only the inverse, and take
1096 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1097 /// (potentially large) APInts around.
1098 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1099   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1100 
1101   // Using the properties listed at the following web page (accessed 06/21/08):
1102   //   http://www.numbertheory.org/php/euclid.html
1103   // (especially the properties numbered 3, 4 and 9) it can be proved that
1104   // BitWidth bits suffice for all the computations in the algorithm implemented
1105   // below. More precisely, this number of bits suffice if the multiplicative
1106   // inverse exists, but may not suffice for the general extended Euclidean
1107   // algorithm.
1108 
1109   APInt r[2] = { modulo, *this };
1110   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1111   APInt q(BitWidth, 0);
1112 
1113   unsigned i;
1114   for (i = 0; r[i^1] != 0; i ^= 1) {
1115     // An overview of the math without the confusing bit-flipping:
1116     // q = r[i-2] / r[i-1]
1117     // r[i] = r[i-2] % r[i-1]
1118     // t[i] = t[i-2] - t[i-1] * q
1119     udivrem(r[i], r[i^1], q, r[i]);
1120     t[i] -= t[i^1] * q;
1121   }
1122 
1123   // If this APInt and the modulo are not coprime, there is no multiplicative
1124   // inverse, so return 0. We check this by looking at the next-to-last
1125   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1126   // algorithm.
1127   if (r[i] != 1)
1128     return APInt(BitWidth, 0);
1129 
1130   // The next-to-last t is the multiplicative inverse.  However, we are
1131   // interested in a positive inverse. Calcuate a positive one from a negative
1132   // one if necessary. A simple addition of the modulo suffices because
1133   // abs(t[i]) is known to be less than *this/2 (see the link above).
1134   return t[i].isNegative() ? t[i] + modulo : t[i];
1135 }
1136 
1137 /// Calculate the magic numbers required to implement a signed integer division
1138 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1139 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1140 /// Warren, Jr., chapter 10.
1141 APInt::ms APInt::magic() const {
1142   const APInt& d = *this;
1143   unsigned p;
1144   APInt ad, anc, delta, q1, r1, q2, r2, t;
1145   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1146   struct ms mag;
1147 
1148   ad = d.abs();
1149   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1150   anc = t - 1 - t.urem(ad);   // absolute value of nc
1151   p = d.getBitWidth() - 1;    // initialize p
1152   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1153   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1154   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1155   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1156   do {
1157     p = p + 1;
1158     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1159     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1160     if (r1.uge(anc)) {  // must be unsigned comparison
1161       q1 = q1 + 1;
1162       r1 = r1 - anc;
1163     }
1164     q2 = q2<<1;          // update q2 = 2p/abs(d)
1165     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1166     if (r2.uge(ad)) {   // must be unsigned comparison
1167       q2 = q2 + 1;
1168       r2 = r2 - ad;
1169     }
1170     delta = ad - r2;
1171   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1172 
1173   mag.m = q2 + 1;
1174   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1175   mag.s = p - d.getBitWidth();          // resulting shift
1176   return mag;
1177 }
1178 
1179 /// Calculate the magic numbers required to implement an unsigned integer
1180 /// division by a constant as a sequence of multiplies, adds and shifts.
1181 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1182 /// S. Warren, Jr., chapter 10.
1183 /// LeadingZeros can be used to simplify the calculation if the upper bits
1184 /// of the divided value are known zero.
1185 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1186   const APInt& d = *this;
1187   unsigned p;
1188   APInt nc, delta, q1, r1, q2, r2;
1189   struct mu magu;
1190   magu.a = 0;               // initialize "add" indicator
1191   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1192   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1193   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1194 
1195   nc = allOnes - (allOnes - d).urem(d);
1196   p = d.getBitWidth() - 1;  // initialize p
1197   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1198   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1199   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1200   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1201   do {
1202     p = p + 1;
1203     if (r1.uge(nc - r1)) {
1204       q1 = q1 + q1 + 1;  // update q1
1205       r1 = r1 + r1 - nc; // update r1
1206     }
1207     else {
1208       q1 = q1+q1; // update q1
1209       r1 = r1+r1; // update r1
1210     }
1211     if ((r2 + 1).uge(d - r2)) {
1212       if (q2.uge(signedMax)) magu.a = 1;
1213       q2 = q2+q2 + 1;     // update q2
1214       r2 = r2+r2 + 1 - d; // update r2
1215     }
1216     else {
1217       if (q2.uge(signedMin)) magu.a = 1;
1218       q2 = q2+q2;     // update q2
1219       r2 = r2+r2 + 1; // update r2
1220     }
1221     delta = d - 1 - r2;
1222   } while (p < d.getBitWidth()*2 &&
1223            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1224   magu.m = q2 + 1; // resulting magic number
1225   magu.s = p - d.getBitWidth();  // resulting shift
1226   return magu;
1227 }
1228 
1229 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1230 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1231 /// variables here have the same names as in the algorithm. Comments explain
1232 /// the algorithm and any deviation from it.
1233 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1234                      unsigned m, unsigned n) {
1235   assert(u && "Must provide dividend");
1236   assert(v && "Must provide divisor");
1237   assert(q && "Must provide quotient");
1238   assert(u != v && u != q && v != q && "Must use different memory");
1239   assert(n>1 && "n must be > 1");
1240 
1241   // b denotes the base of the number system. In our case b is 2^32.
1242   const uint64_t b = uint64_t(1) << 32;
1243 
1244   DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1245   DEBUG(dbgs() << "KnuthDiv: original:");
1246   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1247   DEBUG(dbgs() << " by");
1248   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1249   DEBUG(dbgs() << '\n');
1250   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1251   // u and v by d. Note that we have taken Knuth's advice here to use a power
1252   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1253   // 2 allows us to shift instead of multiply and it is easy to determine the
1254   // shift amount from the leading zeros.  We are basically normalizing the u
1255   // and v so that its high bits are shifted to the top of v's range without
1256   // overflow. Note that this can require an extra word in u so that u must
1257   // be of length m+n+1.
1258   unsigned shift = countLeadingZeros(v[n-1]);
1259   unsigned v_carry = 0;
1260   unsigned u_carry = 0;
1261   if (shift) {
1262     for (unsigned i = 0; i < m+n; ++i) {
1263       unsigned u_tmp = u[i] >> (32 - shift);
1264       u[i] = (u[i] << shift) | u_carry;
1265       u_carry = u_tmp;
1266     }
1267     for (unsigned i = 0; i < n; ++i) {
1268       unsigned v_tmp = v[i] >> (32 - shift);
1269       v[i] = (v[i] << shift) | v_carry;
1270       v_carry = v_tmp;
1271     }
1272   }
1273   u[m+n] = u_carry;
1274 
1275   DEBUG(dbgs() << "KnuthDiv:   normal:");
1276   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1277   DEBUG(dbgs() << " by");
1278   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1279   DEBUG(dbgs() << '\n');
1280 
1281   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1282   int j = m;
1283   do {
1284     DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1285     // D3. [Calculate q'.].
1286     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1287     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1288     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1289     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1290     // on v[n-2] determines at high speed most of the cases in which the trial
1291     // value qp is one too large, and it eliminates all cases where qp is two
1292     // too large.
1293     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1294     DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1295     uint64_t qp = dividend / v[n-1];
1296     uint64_t rp = dividend % v[n-1];
1297     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1298       qp--;
1299       rp += v[n-1];
1300       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1301         qp--;
1302     }
1303     DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1304 
1305     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1306     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1307     // consists of a simple multiplication by a one-place number, combined with
1308     // a subtraction.
1309     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1310     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1311     // true value plus b**(n+1), namely as the b's complement of
1312     // the true value, and a "borrow" to the left should be remembered.
1313     int64_t borrow = 0;
1314     for (unsigned i = 0; i < n; ++i) {
1315       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1316       int64_t subres = int64_t(u[j+i]) - borrow - (unsigned)p;
1317       u[j+i] = (unsigned)subres;
1318       borrow = (p >> 32) - (subres >> 32);
1319       DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i]
1320                    << ", borrow = " << borrow << '\n');
1321     }
1322     bool isNeg = u[j+n] < borrow;
1323     u[j+n] -= (unsigned)borrow;
1324 
1325     DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1326     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1327     DEBUG(dbgs() << '\n');
1328 
1329     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1330     // negative, go to step D6; otherwise go on to step D7.
1331     q[j] = (unsigned)qp;
1332     if (isNeg) {
1333       // D6. [Add back]. The probability that this step is necessary is very
1334       // small, on the order of only 2/b. Make sure that test data accounts for
1335       // this possibility. Decrease q[j] by 1
1336       q[j]--;
1337       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1338       // A carry will occur to the left of u[j+n], and it should be ignored
1339       // since it cancels with the borrow that occurred in D4.
1340       bool carry = false;
1341       for (unsigned i = 0; i < n; i++) {
1342         unsigned limit = std::min(u[j+i],v[i]);
1343         u[j+i] += v[i] + carry;
1344         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1345       }
1346       u[j+n] += carry;
1347     }
1348     DEBUG(dbgs() << "KnuthDiv: after correction:");
1349     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1350     DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1351 
1352   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1353   } while (--j >= 0);
1354 
1355   DEBUG(dbgs() << "KnuthDiv: quotient:");
1356   DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1357   DEBUG(dbgs() << '\n');
1358 
1359   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1360   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1361   // compute the remainder (urem uses this).
1362   if (r) {
1363     // The value d is expressed by the "shift" value above since we avoided
1364     // multiplication by d by using a shift left. So, all we have to do is
1365     // shift right here.
1366     if (shift) {
1367       unsigned carry = 0;
1368       DEBUG(dbgs() << "KnuthDiv: remainder:");
1369       for (int i = n-1; i >= 0; i--) {
1370         r[i] = (u[i] >> shift) | carry;
1371         carry = u[i] << (32 - shift);
1372         DEBUG(dbgs() << " " << r[i]);
1373       }
1374     } else {
1375       for (int i = n-1; i >= 0; i--) {
1376         r[i] = u[i];
1377         DEBUG(dbgs() << " " << r[i]);
1378       }
1379     }
1380     DEBUG(dbgs() << '\n');
1381   }
1382   DEBUG(dbgs() << '\n');
1383 }
1384 
1385 void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
1386                    unsigned rhsWords, APInt *Quotient, APInt *Remainder) {
1387   assert(lhsWords >= rhsWords && "Fractional result");
1388 
1389   // First, compose the values into an array of 32-bit words instead of
1390   // 64-bit words. This is a necessity of both the "short division" algorithm
1391   // and the Knuth "classical algorithm" which requires there to be native
1392   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1393   // can't use 64-bit operands here because we don't have native results of
1394   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1395   // work on large-endian machines.
1396   uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1397   unsigned n = rhsWords * 2;
1398   unsigned m = (lhsWords * 2) - n;
1399 
1400   // Allocate space for the temporary values we need either on the stack, if
1401   // it will fit, or on the heap if it won't.
1402   unsigned SPACE[128];
1403   unsigned *U = nullptr;
1404   unsigned *V = nullptr;
1405   unsigned *Q = nullptr;
1406   unsigned *R = nullptr;
1407   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1408     U = &SPACE[0];
1409     V = &SPACE[m+n+1];
1410     Q = &SPACE[(m+n+1) + n];
1411     if (Remainder)
1412       R = &SPACE[(m+n+1) + n + (m+n)];
1413   } else {
1414     U = new unsigned[m + n + 1];
1415     V = new unsigned[n];
1416     Q = new unsigned[m+n];
1417     if (Remainder)
1418       R = new unsigned[n];
1419   }
1420 
1421   // Initialize the dividend
1422   memset(U, 0, (m+n+1)*sizeof(unsigned));
1423   for (unsigned i = 0; i < lhsWords; ++i) {
1424     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.U.VAL : LHS.U.pVal[i]);
1425     U[i * 2] = (unsigned)(tmp & mask);
1426     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1427   }
1428   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1429 
1430   // Initialize the divisor
1431   memset(V, 0, (n)*sizeof(unsigned));
1432   for (unsigned i = 0; i < rhsWords; ++i) {
1433     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.U.VAL : RHS.U.pVal[i]);
1434     V[i * 2] = (unsigned)(tmp & mask);
1435     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1436   }
1437 
1438   // initialize the quotient and remainder
1439   memset(Q, 0, (m+n) * sizeof(unsigned));
1440   if (Remainder)
1441     memset(R, 0, n * sizeof(unsigned));
1442 
1443   // Now, adjust m and n for the Knuth division. n is the number of words in
1444   // the divisor. m is the number of words by which the dividend exceeds the
1445   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1446   // contain any zero words or the Knuth algorithm fails.
1447   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1448     n--;
1449     m++;
1450   }
1451   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1452     m--;
1453 
1454   // If we're left with only a single word for the divisor, Knuth doesn't work
1455   // so we implement the short division algorithm here. This is much simpler
1456   // and faster because we are certain that we can divide a 64-bit quantity
1457   // by a 32-bit quantity at hardware speed and short division is simply a
1458   // series of such operations. This is just like doing short division but we
1459   // are using base 2^32 instead of base 10.
1460   assert(n != 0 && "Divide by zero?");
1461   if (n == 1) {
1462     unsigned divisor = V[0];
1463     unsigned remainder = 0;
1464     for (int i = m+n-1; i >= 0; i--) {
1465       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1466       if (partial_dividend == 0) {
1467         Q[i] = 0;
1468         remainder = 0;
1469       } else if (partial_dividend < divisor) {
1470         Q[i] = 0;
1471         remainder = (unsigned)partial_dividend;
1472       } else if (partial_dividend == divisor) {
1473         Q[i] = 1;
1474         remainder = 0;
1475       } else {
1476         Q[i] = (unsigned)(partial_dividend / divisor);
1477         remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1478       }
1479     }
1480     if (R)
1481       R[0] = remainder;
1482   } else {
1483     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1484     // case n > 1.
1485     KnuthDiv(U, V, Q, R, m, n);
1486   }
1487 
1488   // If the caller wants the quotient
1489   if (Quotient) {
1490     // Set up the Quotient value's memory.
1491     if (Quotient->BitWidth != LHS.BitWidth) {
1492       if (Quotient->isSingleWord())
1493         Quotient->U.VAL = 0;
1494       else
1495         delete [] Quotient->U.pVal;
1496       Quotient->BitWidth = LHS.BitWidth;
1497       if (!Quotient->isSingleWord())
1498         Quotient->U.pVal = getClearedMemory(Quotient->getNumWords());
1499     } else
1500       Quotient->clearAllBits();
1501 
1502     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1503     // order words.
1504     // This case is currently dead as all users of divide() handle trivial cases
1505     // earlier.
1506     if (lhsWords == 1) {
1507       uint64_t tmp =
1508         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1509       if (Quotient->isSingleWord())
1510         Quotient->U.VAL = tmp;
1511       else
1512         Quotient->U.pVal[0] = tmp;
1513     } else {
1514       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1515       for (unsigned i = 0; i < lhsWords; ++i)
1516         Quotient->U.pVal[i] =
1517           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1518     }
1519   }
1520 
1521   // If the caller wants the remainder
1522   if (Remainder) {
1523     // Set up the Remainder value's memory.
1524     if (Remainder->BitWidth != RHS.BitWidth) {
1525       if (Remainder->isSingleWord())
1526         Remainder->U.VAL = 0;
1527       else
1528         delete [] Remainder->U.pVal;
1529       Remainder->BitWidth = RHS.BitWidth;
1530       if (!Remainder->isSingleWord())
1531         Remainder->U.pVal = getClearedMemory(Remainder->getNumWords());
1532     } else
1533       Remainder->clearAllBits();
1534 
1535     // The remainder is in R. Reconstitute the remainder into Remainder's low
1536     // order words.
1537     if (rhsWords == 1) {
1538       uint64_t tmp =
1539         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1540       if (Remainder->isSingleWord())
1541         Remainder->U.VAL = tmp;
1542       else
1543         Remainder->U.pVal[0] = tmp;
1544     } else {
1545       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1546       for (unsigned i = 0; i < rhsWords; ++i)
1547         Remainder->U.pVal[i] =
1548           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1549     }
1550   }
1551 
1552   // Clean up the memory we allocated.
1553   if (U != &SPACE[0]) {
1554     delete [] U;
1555     delete [] V;
1556     delete [] Q;
1557     delete [] R;
1558   }
1559 }
1560 
1561 APInt APInt::udiv(const APInt& RHS) const {
1562   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1563 
1564   // First, deal with the easy case
1565   if (isSingleWord()) {
1566     assert(RHS.U.VAL != 0 && "Divide by zero?");
1567     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1568   }
1569 
1570   // Get some facts about the LHS and RHS number of bits and words
1571   unsigned rhsBits = RHS.getActiveBits();
1572   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1573   assert(rhsWords && "Divided by zero???");
1574   unsigned lhsBits = this->getActiveBits();
1575   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1576 
1577   // Deal with some degenerate cases
1578   if (!lhsWords)
1579     // 0 / X ===> 0
1580     return APInt(BitWidth, 0);
1581   else if (lhsWords < rhsWords || this->ult(RHS)) {
1582     // X / Y ===> 0, iff X < Y
1583     return APInt(BitWidth, 0);
1584   } else if (*this == RHS) {
1585     // X / X ===> 1
1586     return APInt(BitWidth, 1);
1587   } else if (lhsWords == 1 && rhsWords == 1) {
1588     // All high words are zero, just use native divide
1589     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1590   }
1591 
1592   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1593   APInt Quotient(1,0); // to hold result.
1594   divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr);
1595   return Quotient;
1596 }
1597 
1598 APInt APInt::sdiv(const APInt &RHS) const {
1599   if (isNegative()) {
1600     if (RHS.isNegative())
1601       return (-(*this)).udiv(-RHS);
1602     return -((-(*this)).udiv(RHS));
1603   }
1604   if (RHS.isNegative())
1605     return -(this->udiv(-RHS));
1606   return this->udiv(RHS);
1607 }
1608 
1609 APInt APInt::urem(const APInt& RHS) const {
1610   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1611   if (isSingleWord()) {
1612     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1613     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1614   }
1615 
1616   // Get some facts about the LHS
1617   unsigned lhsBits = getActiveBits();
1618   unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1619 
1620   // Get some facts about the RHS
1621   unsigned rhsBits = RHS.getActiveBits();
1622   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1623   assert(rhsWords && "Performing remainder operation by zero ???");
1624 
1625   // Check the degenerate cases
1626   if (lhsWords == 0) {
1627     // 0 % Y ===> 0
1628     return APInt(BitWidth, 0);
1629   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1630     // X % Y ===> X, iff X < Y
1631     return *this;
1632   } else if (*this == RHS) {
1633     // X % X == 0;
1634     return APInt(BitWidth, 0);
1635   } else if (lhsWords == 1) {
1636     // All high words are zero, just use native remainder
1637     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1638   }
1639 
1640   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1641   APInt Remainder(1,0);
1642   divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder);
1643   return Remainder;
1644 }
1645 
1646 APInt APInt::srem(const APInt &RHS) const {
1647   if (isNegative()) {
1648     if (RHS.isNegative())
1649       return -((-(*this)).urem(-RHS));
1650     return -((-(*this)).urem(RHS));
1651   }
1652   if (RHS.isNegative())
1653     return this->urem(-RHS);
1654   return this->urem(RHS);
1655 }
1656 
1657 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1658                     APInt &Quotient, APInt &Remainder) {
1659   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1660 
1661   // First, deal with the easy case
1662   if (LHS.isSingleWord()) {
1663     assert(RHS.U.VAL != 0 && "Divide by zero?");
1664     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1665     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1666     Quotient = APInt(LHS.BitWidth, QuotVal);
1667     Remainder = APInt(LHS.BitWidth, RemVal);
1668     return;
1669   }
1670 
1671   // Get some size facts about the dividend and divisor
1672   unsigned lhsBits  = LHS.getActiveBits();
1673   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1674   unsigned rhsBits  = RHS.getActiveBits();
1675   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1676 
1677   // Check the degenerate cases
1678   if (lhsWords == 0) {
1679     Quotient = 0;                // 0 / Y ===> 0
1680     Remainder = 0;               // 0 % Y ===> 0
1681     return;
1682   }
1683 
1684   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1685     Remainder = LHS;            // X % Y ===> X, iff X < Y
1686     Quotient = 0;               // X / Y ===> 0, iff X < Y
1687     return;
1688   }
1689 
1690   if (LHS == RHS) {
1691     Quotient  = 1;              // X / X ===> 1
1692     Remainder = 0;              // X % X ===> 0;
1693     return;
1694   }
1695 
1696   if (lhsWords == 1 && rhsWords == 1) {
1697     // There is only one word to consider so use the native versions.
1698     uint64_t lhsValue = LHS.isSingleWord() ? LHS.U.VAL : LHS.U.pVal[0];
1699     uint64_t rhsValue = RHS.isSingleWord() ? RHS.U.VAL : RHS.U.pVal[0];
1700     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1701     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1702     return;
1703   }
1704 
1705   // Okay, lets do it the long way
1706   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
1707 }
1708 
1709 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1710                     APInt &Quotient, APInt &Remainder) {
1711   if (LHS.isNegative()) {
1712     if (RHS.isNegative())
1713       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1714     else {
1715       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1716       Quotient = -Quotient;
1717     }
1718     Remainder = -Remainder;
1719   } else if (RHS.isNegative()) {
1720     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1721     Quotient = -Quotient;
1722   } else {
1723     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1724   }
1725 }
1726 
1727 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1728   APInt Res = *this+RHS;
1729   Overflow = isNonNegative() == RHS.isNonNegative() &&
1730              Res.isNonNegative() != isNonNegative();
1731   return Res;
1732 }
1733 
1734 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1735   APInt Res = *this+RHS;
1736   Overflow = Res.ult(RHS);
1737   return Res;
1738 }
1739 
1740 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1741   APInt Res = *this - RHS;
1742   Overflow = isNonNegative() != RHS.isNonNegative() &&
1743              Res.isNonNegative() != isNonNegative();
1744   return Res;
1745 }
1746 
1747 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1748   APInt Res = *this-RHS;
1749   Overflow = Res.ugt(*this);
1750   return Res;
1751 }
1752 
1753 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1754   // MININT/-1  -->  overflow.
1755   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1756   return sdiv(RHS);
1757 }
1758 
1759 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1760   APInt Res = *this * RHS;
1761 
1762   if (*this != 0 && RHS != 0)
1763     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1764   else
1765     Overflow = false;
1766   return Res;
1767 }
1768 
1769 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1770   APInt Res = *this * RHS;
1771 
1772   if (*this != 0 && RHS != 0)
1773     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1774   else
1775     Overflow = false;
1776   return Res;
1777 }
1778 
1779 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1780   Overflow = ShAmt.uge(getBitWidth());
1781   if (Overflow)
1782     return APInt(BitWidth, 0);
1783 
1784   if (isNonNegative()) // Don't allow sign change.
1785     Overflow = ShAmt.uge(countLeadingZeros());
1786   else
1787     Overflow = ShAmt.uge(countLeadingOnes());
1788 
1789   return *this << ShAmt;
1790 }
1791 
1792 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1793   Overflow = ShAmt.uge(getBitWidth());
1794   if (Overflow)
1795     return APInt(BitWidth, 0);
1796 
1797   Overflow = ShAmt.ugt(countLeadingZeros());
1798 
1799   return *this << ShAmt;
1800 }
1801 
1802 
1803 
1804 
1805 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1806   // Check our assumptions here
1807   assert(!str.empty() && "Invalid string length");
1808   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1809           radix == 36) &&
1810          "Radix should be 2, 8, 10, 16, or 36!");
1811 
1812   StringRef::iterator p = str.begin();
1813   size_t slen = str.size();
1814   bool isNeg = *p == '-';
1815   if (*p == '-' || *p == '+') {
1816     p++;
1817     slen--;
1818     assert(slen && "String is only a sign, needs a value.");
1819   }
1820   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1821   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
1822   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
1823   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
1824          "Insufficient bit width");
1825 
1826   // Allocate memory if needed
1827   if (isSingleWord())
1828     U.VAL = 0;
1829   else
1830     U.pVal = getClearedMemory(getNumWords());
1831 
1832   // Figure out if we can shift instead of multiply
1833   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1834 
1835   // Set up an APInt for the radix multiplier outside the loop so we don't
1836   // constantly construct/destruct it.
1837   APInt apradix(getBitWidth(), radix);
1838 
1839   // Enter digit traversal loop
1840   for (StringRef::iterator e = str.end(); p != e; ++p) {
1841     unsigned digit = getDigit(*p, radix);
1842     assert(digit < radix && "Invalid character in digit string");
1843 
1844     // Shift or multiply the value by the radix
1845     if (slen > 1) {
1846       if (shift)
1847         *this <<= shift;
1848       else
1849         *this *= apradix;
1850     }
1851 
1852     // Add in the digit we just interpreted
1853     *this += digit;
1854   }
1855   // If its negative, put it in two's complement form
1856   if (isNeg) {
1857     --(*this);
1858     this->flipAllBits();
1859   }
1860 }
1861 
1862 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
1863                      bool Signed, bool formatAsCLiteral) const {
1864   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
1865           Radix == 36) &&
1866          "Radix should be 2, 8, 10, 16, or 36!");
1867 
1868   const char *Prefix = "";
1869   if (formatAsCLiteral) {
1870     switch (Radix) {
1871       case 2:
1872         // Binary literals are a non-standard extension added in gcc 4.3:
1873         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
1874         Prefix = "0b";
1875         break;
1876       case 8:
1877         Prefix = "0";
1878         break;
1879       case 10:
1880         break; // No prefix
1881       case 16:
1882         Prefix = "0x";
1883         break;
1884       default:
1885         llvm_unreachable("Invalid radix!");
1886     }
1887   }
1888 
1889   // First, check for a zero value and just short circuit the logic below.
1890   if (*this == 0) {
1891     while (*Prefix) {
1892       Str.push_back(*Prefix);
1893       ++Prefix;
1894     };
1895     Str.push_back('0');
1896     return;
1897   }
1898 
1899   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1900 
1901   if (isSingleWord()) {
1902     char Buffer[65];
1903     char *BufPtr = Buffer+65;
1904 
1905     uint64_t N;
1906     if (!Signed) {
1907       N = getZExtValue();
1908     } else {
1909       int64_t I = getSExtValue();
1910       if (I >= 0) {
1911         N = I;
1912       } else {
1913         Str.push_back('-');
1914         N = -(uint64_t)I;
1915       }
1916     }
1917 
1918     while (*Prefix) {
1919       Str.push_back(*Prefix);
1920       ++Prefix;
1921     };
1922 
1923     while (N) {
1924       *--BufPtr = Digits[N % Radix];
1925       N /= Radix;
1926     }
1927     Str.append(BufPtr, Buffer+65);
1928     return;
1929   }
1930 
1931   APInt Tmp(*this);
1932 
1933   if (Signed && isNegative()) {
1934     // They want to print the signed version and it is a negative value
1935     // Flip the bits and add one to turn it into the equivalent positive
1936     // value and put a '-' in the result.
1937     Tmp.flipAllBits();
1938     ++Tmp;
1939     Str.push_back('-');
1940   }
1941 
1942   while (*Prefix) {
1943     Str.push_back(*Prefix);
1944     ++Prefix;
1945   };
1946 
1947   // We insert the digits backward, then reverse them to get the right order.
1948   unsigned StartDig = Str.size();
1949 
1950   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
1951   // because the number of bits per digit (1, 3 and 4 respectively) divides
1952   // equally.  We just shift until the value is zero.
1953   if (Radix == 2 || Radix == 8 || Radix == 16) {
1954     // Just shift tmp right for each digit width until it becomes zero
1955     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
1956     unsigned MaskAmt = Radix - 1;
1957 
1958     while (Tmp != 0) {
1959       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
1960       Str.push_back(Digits[Digit]);
1961       Tmp.lshrInPlace(ShiftAmt);
1962     }
1963   } else {
1964     APInt divisor(Radix == 10? 4 : 8, Radix);
1965     while (Tmp != 0) {
1966       APInt APdigit(1, 0);
1967       APInt tmp2(Tmp.getBitWidth(), 0);
1968       divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
1969              &APdigit);
1970       unsigned Digit = (unsigned)APdigit.getZExtValue();
1971       assert(Digit < Radix && "divide failed");
1972       Str.push_back(Digits[Digit]);
1973       Tmp = tmp2;
1974     }
1975   }
1976 
1977   // Reverse the digits before returning.
1978   std::reverse(Str.begin()+StartDig, Str.end());
1979 }
1980 
1981 /// Returns the APInt as a std::string. Note that this is an inefficient method.
1982 /// It is better to pass in a SmallVector/SmallString to the methods above.
1983 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
1984   SmallString<40> S;
1985   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
1986   return S.str();
1987 }
1988 
1989 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1990 LLVM_DUMP_METHOD void APInt::dump() const {
1991   SmallString<40> S, U;
1992   this->toStringUnsigned(U);
1993   this->toStringSigned(S);
1994   dbgs() << "APInt(" << BitWidth << "b, "
1995          << U << "u " << S << "s)\n";
1996 }
1997 #endif
1998 
1999 void APInt::print(raw_ostream &OS, bool isSigned) const {
2000   SmallString<40> S;
2001   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2002   OS << S;
2003 }
2004 
2005 // This implements a variety of operations on a representation of
2006 // arbitrary precision, two's-complement, bignum integer values.
2007 
2008 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2009 // and unrestricting assumption.
2010 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2011               "Part width must be divisible by 2!");
2012 
2013 /* Some handy functions local to this file.  */
2014 
2015 /* Returns the integer part with the least significant BITS set.
2016    BITS cannot be zero.  */
2017 static inline APInt::WordType lowBitMask(unsigned bits) {
2018   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2019 
2020   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2021 }
2022 
2023 /* Returns the value of the lower half of PART.  */
2024 static inline APInt::WordType lowHalf(APInt::WordType part) {
2025   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2026 }
2027 
2028 /* Returns the value of the upper half of PART.  */
2029 static inline APInt::WordType highHalf(APInt::WordType part) {
2030   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2031 }
2032 
2033 /* Returns the bit number of the most significant set bit of a part.
2034    If the input number has no bits set -1U is returned.  */
2035 static unsigned partMSB(APInt::WordType value) {
2036   return findLastSet(value, ZB_Max);
2037 }
2038 
2039 /* Returns the bit number of the least significant set bit of a
2040    part.  If the input number has no bits set -1U is returned.  */
2041 static unsigned partLSB(APInt::WordType value) {
2042   return findFirstSet(value, ZB_Max);
2043 }
2044 
2045 /* Sets the least significant part of a bignum to the input value, and
2046    zeroes out higher parts.  */
2047 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2048   assert(parts > 0);
2049 
2050   dst[0] = part;
2051   for (unsigned i = 1; i < parts; i++)
2052     dst[i] = 0;
2053 }
2054 
2055 /* Assign one bignum to another.  */
2056 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2057   for (unsigned i = 0; i < parts; i++)
2058     dst[i] = src[i];
2059 }
2060 
2061 /* Returns true if a bignum is zero, false otherwise.  */
2062 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2063   for (unsigned i = 0; i < parts; i++)
2064     if (src[i])
2065       return false;
2066 
2067   return true;
2068 }
2069 
2070 /* Extract the given bit of a bignum; returns 0 or 1.  */
2071 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2072   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2073 }
2074 
2075 /* Set the given bit of a bignum. */
2076 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2077   parts[whichWord(bit)] |= maskBit(bit);
2078 }
2079 
2080 /* Clears the given bit of a bignum. */
2081 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2082   parts[whichWord(bit)] &= ~maskBit(bit);
2083 }
2084 
2085 /* Returns the bit number of the least significant set bit of a
2086    number.  If the input number has no bits set -1U is returned.  */
2087 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2088   for (unsigned i = 0; i < n; i++) {
2089     if (parts[i] != 0) {
2090       unsigned lsb = partLSB(parts[i]);
2091 
2092       return lsb + i * APINT_BITS_PER_WORD;
2093     }
2094   }
2095 
2096   return -1U;
2097 }
2098 
2099 /* Returns the bit number of the most significant set bit of a number.
2100    If the input number has no bits set -1U is returned.  */
2101 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2102   do {
2103     --n;
2104 
2105     if (parts[n] != 0) {
2106       unsigned msb = partMSB(parts[n]);
2107 
2108       return msb + n * APINT_BITS_PER_WORD;
2109     }
2110   } while (n);
2111 
2112   return -1U;
2113 }
2114 
2115 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2116    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2117    the least significant bit of DST.  All high bits above srcBITS in
2118    DST are zero-filled.  */
2119 void
2120 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2121                  unsigned srcBits, unsigned srcLSB) {
2122   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2123   assert(dstParts <= dstCount);
2124 
2125   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2126   tcAssign (dst, src + firstSrcPart, dstParts);
2127 
2128   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2129   tcShiftRight (dst, dstParts, shift);
2130 
2131   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2132      in DST.  If this is less that srcBits, append the rest, else
2133      clear the high bits.  */
2134   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2135   if (n < srcBits) {
2136     WordType mask = lowBitMask (srcBits - n);
2137     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2138                           << n % APINT_BITS_PER_WORD);
2139   } else if (n > srcBits) {
2140     if (srcBits % APINT_BITS_PER_WORD)
2141       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2142   }
2143 
2144   /* Clear high parts.  */
2145   while (dstParts < dstCount)
2146     dst[dstParts++] = 0;
2147 }
2148 
2149 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2150 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2151                              WordType c, unsigned parts) {
2152   assert(c <= 1);
2153 
2154   for (unsigned i = 0; i < parts; i++) {
2155     WordType l = dst[i];
2156     if (c) {
2157       dst[i] += rhs[i] + 1;
2158       c = (dst[i] <= l);
2159     } else {
2160       dst[i] += rhs[i];
2161       c = (dst[i] < l);
2162     }
2163   }
2164 
2165   return c;
2166 }
2167 
2168 /// This function adds a single "word" integer, src, to the multiple
2169 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2170 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2171 /// @returns the carry of the addition.
2172 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2173                                  unsigned parts) {
2174   for (unsigned i = 0; i < parts; ++i) {
2175     dst[i] += src;
2176     if (dst[i] >= src)
2177       return 0; // No need to carry so exit early.
2178     src = 1; // Carry one to next digit.
2179   }
2180 
2181   return 1;
2182 }
2183 
2184 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2185 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2186                                   WordType c, unsigned parts) {
2187   assert(c <= 1);
2188 
2189   for (unsigned i = 0; i < parts; i++) {
2190     WordType l = dst[i];
2191     if (c) {
2192       dst[i] -= rhs[i] + 1;
2193       c = (dst[i] >= l);
2194     } else {
2195       dst[i] -= rhs[i];
2196       c = (dst[i] > l);
2197     }
2198   }
2199 
2200   return c;
2201 }
2202 
2203 /// This function subtracts a single "word" (64-bit word), src, from
2204 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2205 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2206 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2207 /// exhausted. In other words, if src > dst then this function returns 1,
2208 /// otherwise 0.
2209 /// @returns the borrow out of the subtraction
2210 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2211                                       unsigned parts) {
2212   for (unsigned i = 0; i < parts; ++i) {
2213     WordType Dst = dst[i];
2214     dst[i] -= src;
2215     if (src <= Dst)
2216       return 0; // No need to borrow so exit early.
2217     src = 1; // We have to "borrow 1" from next "word"
2218   }
2219 
2220   return 1;
2221 }
2222 
2223 /* Negate a bignum in-place.  */
2224 void APInt::tcNegate(WordType *dst, unsigned parts) {
2225   tcComplement(dst, parts);
2226   tcIncrement(dst, parts);
2227 }
2228 
2229 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2230     DST  = SRC * MULTIPLIER + CARRY   if add is false
2231 
2232     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2233     they must start at the same point, i.e. DST == SRC.
2234 
2235     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2236     returned.  Otherwise DST is filled with the least significant
2237     DSTPARTS parts of the result, and if all of the omitted higher
2238     parts were zero return zero, otherwise overflow occurred and
2239     return one.  */
2240 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2241                           WordType multiplier, WordType carry,
2242                           unsigned srcParts, unsigned dstParts,
2243                           bool add) {
2244   /* Otherwise our writes of DST kill our later reads of SRC.  */
2245   assert(dst <= src || dst >= src + srcParts);
2246   assert(dstParts <= srcParts + 1);
2247 
2248   /* N loops; minimum of dstParts and srcParts.  */
2249   unsigned n = dstParts < srcParts ? dstParts: srcParts;
2250 
2251   unsigned i;
2252   for (i = 0; i < n; i++) {
2253     WordType low, mid, high, srcPart;
2254 
2255       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2256 
2257          This cannot overflow, because
2258 
2259          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2260 
2261          which is less than n^2.  */
2262 
2263     srcPart = src[i];
2264 
2265     if (multiplier == 0 || srcPart == 0) {
2266       low = carry;
2267       high = 0;
2268     } else {
2269       low = lowHalf(srcPart) * lowHalf(multiplier);
2270       high = highHalf(srcPart) * highHalf(multiplier);
2271 
2272       mid = lowHalf(srcPart) * highHalf(multiplier);
2273       high += highHalf(mid);
2274       mid <<= APINT_BITS_PER_WORD / 2;
2275       if (low + mid < low)
2276         high++;
2277       low += mid;
2278 
2279       mid = highHalf(srcPart) * lowHalf(multiplier);
2280       high += highHalf(mid);
2281       mid <<= APINT_BITS_PER_WORD / 2;
2282       if (low + mid < low)
2283         high++;
2284       low += mid;
2285 
2286       /* Now add carry.  */
2287       if (low + carry < low)
2288         high++;
2289       low += carry;
2290     }
2291 
2292     if (add) {
2293       /* And now DST[i], and store the new low part there.  */
2294       if (low + dst[i] < low)
2295         high++;
2296       dst[i] += low;
2297     } else
2298       dst[i] = low;
2299 
2300     carry = high;
2301   }
2302 
2303   if (i < dstParts) {
2304     /* Full multiplication, there is no overflow.  */
2305     assert(i + 1 == dstParts);
2306     dst[i] = carry;
2307     return 0;
2308   } else {
2309     /* We overflowed if there is carry.  */
2310     if (carry)
2311       return 1;
2312 
2313     /* We would overflow if any significant unwritten parts would be
2314        non-zero.  This is true if any remaining src parts are non-zero
2315        and the multiplier is non-zero.  */
2316     if (multiplier)
2317       for (; i < srcParts; i++)
2318         if (src[i])
2319           return 1;
2320 
2321     /* We fitted in the narrow destination.  */
2322     return 0;
2323   }
2324 }
2325 
2326 /* DST = LHS * RHS, where DST has the same width as the operands and
2327    is filled with the least significant parts of the result.  Returns
2328    one if overflow occurred, otherwise zero.  DST must be disjoint
2329    from both operands.  */
2330 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2331                       const WordType *rhs, unsigned parts) {
2332   assert(dst != lhs && dst != rhs);
2333 
2334   int overflow = 0;
2335   tcSet(dst, 0, parts);
2336 
2337   for (unsigned i = 0; i < parts; i++)
2338     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2339                                parts - i, true);
2340 
2341   return overflow;
2342 }
2343 
2344 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2345    operands.  No overflow occurs.  DST must be disjoint from both
2346    operands.  Returns the number of parts required to hold the
2347    result.  */
2348 unsigned APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2349                                const WordType *rhs, unsigned lhsParts,
2350                                unsigned rhsParts) {
2351   /* Put the narrower number on the LHS for less loops below.  */
2352   if (lhsParts > rhsParts) {
2353     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2354   } else {
2355     assert(dst != lhs && dst != rhs);
2356 
2357     tcSet(dst, 0, rhsParts);
2358 
2359     for (unsigned i = 0; i < lhsParts; i++)
2360       tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2361 
2362     unsigned n = lhsParts + rhsParts;
2363 
2364     return n - (dst[n - 1] == 0);
2365   }
2366 }
2367 
2368 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2369    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2370    set REMAINDER to the remainder, return zero.  i.e.
2371 
2372    OLD_LHS = RHS * LHS + REMAINDER
2373 
2374    SCRATCH is a bignum of the same size as the operands and result for
2375    use by the routine; its contents need not be initialized and are
2376    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2377 */
2378 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2379                     WordType *remainder, WordType *srhs,
2380                     unsigned parts) {
2381   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2382 
2383   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2384   if (shiftCount == 0)
2385     return true;
2386 
2387   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2388   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2389   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2390 
2391   tcAssign(srhs, rhs, parts);
2392   tcShiftLeft(srhs, parts, shiftCount);
2393   tcAssign(remainder, lhs, parts);
2394   tcSet(lhs, 0, parts);
2395 
2396   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2397      the total.  */
2398   for (;;) {
2399       int compare;
2400 
2401       compare = tcCompare(remainder, srhs, parts);
2402       if (compare >= 0) {
2403         tcSubtract(remainder, srhs, 0, parts);
2404         lhs[n] |= mask;
2405       }
2406 
2407       if (shiftCount == 0)
2408         break;
2409       shiftCount--;
2410       tcShiftRight(srhs, parts, 1);
2411       if ((mask >>= 1) == 0) {
2412         mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2413         n--;
2414       }
2415   }
2416 
2417   return false;
2418 }
2419 
2420 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2421 /// no restrictions on Count.
2422 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2423   // Don't bother performing a no-op shift.
2424   if (!Count)
2425     return;
2426 
2427   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2428   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2429   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2430 
2431   // Fastpath for moving by whole words.
2432   if (BitShift == 0) {
2433     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2434   } else {
2435     while (Words-- > WordShift) {
2436       Dst[Words] = Dst[Words - WordShift] << BitShift;
2437       if (Words > WordShift)
2438         Dst[Words] |=
2439           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2440     }
2441   }
2442 
2443   // Fill in the remainder with 0s.
2444   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2445 }
2446 
2447 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2448 /// are no restrictions on Count.
2449 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2450   // Don't bother performing a no-op shift.
2451   if (!Count)
2452     return;
2453 
2454   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2455   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2456   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2457 
2458   unsigned WordsToMove = Words - WordShift;
2459   // Fastpath for moving by whole words.
2460   if (BitShift == 0) {
2461     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2462   } else {
2463     for (unsigned i = 0; i != WordsToMove; ++i) {
2464       Dst[i] = Dst[i + WordShift] >> BitShift;
2465       if (i + 1 != WordsToMove)
2466         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2467     }
2468   }
2469 
2470   // Fill in the remainder with 0s.
2471   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2472 }
2473 
2474 /* Bitwise and of two bignums.  */
2475 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2476   for (unsigned i = 0; i < parts; i++)
2477     dst[i] &= rhs[i];
2478 }
2479 
2480 /* Bitwise inclusive or of two bignums.  */
2481 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2482   for (unsigned i = 0; i < parts; i++)
2483     dst[i] |= rhs[i];
2484 }
2485 
2486 /* Bitwise exclusive or of two bignums.  */
2487 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2488   for (unsigned i = 0; i < parts; i++)
2489     dst[i] ^= rhs[i];
2490 }
2491 
2492 /* Complement a bignum in-place.  */
2493 void APInt::tcComplement(WordType *dst, unsigned parts) {
2494   for (unsigned i = 0; i < parts; i++)
2495     dst[i] = ~dst[i];
2496 }
2497 
2498 /* Comparison (unsigned) of two bignums.  */
2499 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2500                      unsigned parts) {
2501   while (parts) {
2502     parts--;
2503     if (lhs[parts] != rhs[parts])
2504       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2505   }
2506 
2507   return 0;
2508 }
2509 
2510 /* Set the least significant BITS bits of a bignum, clear the
2511    rest.  */
2512 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2513                                       unsigned bits) {
2514   unsigned i = 0;
2515   while (bits > APINT_BITS_PER_WORD) {
2516     dst[i++] = ~(WordType) 0;
2517     bits -= APINT_BITS_PER_WORD;
2518   }
2519 
2520   if (bits)
2521     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2522 
2523   while (i < parts)
2524     dst[i++] = 0;
2525 }
2526