1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(bigVal.data() && "Null pointer detected!");
93   if (isSingleWord())
94     U.VAL = bigVal[0];
95   else {
96     // Get memory, cleared to 0
97     U.pVal = getClearedMemory(getNumWords());
98     // Calculate the number of words to copy
99     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
100     // Copy the words from bigVal to pVal
101     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
102   }
103   // Make sure unused high bits are cleared
104   clearUnusedBits();
105 }
106 
107 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) : BitWidth(numBits) {
108   initFromArray(bigVal);
109 }
110 
111 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
112     : BitWidth(numBits) {
113   initFromArray(makeArrayRef(bigVal, numWords));
114 }
115 
116 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
117     : BitWidth(numbits) {
118   fromString(numbits, Str, radix);
119 }
120 
121 void APInt::reallocate(unsigned NewBitWidth) {
122   // If the number of words is the same we can just change the width and stop.
123   if (getNumWords() == getNumWords(NewBitWidth)) {
124     BitWidth = NewBitWidth;
125     return;
126   }
127 
128   // If we have an allocation, delete it.
129   if (!isSingleWord())
130     delete [] U.pVal;
131 
132   // Update BitWidth.
133   BitWidth = NewBitWidth;
134 
135   // If we are supposed to have an allocation, create it.
136   if (!isSingleWord())
137     U.pVal = getMemory(getNumWords());
138 }
139 
140 void APInt::AssignSlowCase(const APInt& RHS) {
141   // Don't do anything for X = X
142   if (this == &RHS)
143     return;
144 
145   // Adjust the bit width and handle allocations as necessary.
146   reallocate(RHS.getBitWidth());
147 
148   // Copy the data.
149   if (isSingleWord())
150     U.VAL = RHS.U.VAL;
151   else
152     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
153 }
154 
155 /// This method 'profiles' an APInt for use with FoldingSet.
156 void APInt::Profile(FoldingSetNodeID& ID) const {
157   ID.AddInteger(BitWidth);
158 
159   if (isSingleWord()) {
160     ID.AddInteger(U.VAL);
161     return;
162   }
163 
164   unsigned NumWords = getNumWords();
165   for (unsigned i = 0; i < NumWords; ++i)
166     ID.AddInteger(U.pVal[i]);
167 }
168 
169 /// Prefix increment operator. Increments the APInt by one.
170 APInt& APInt::operator++() {
171   if (isSingleWord())
172     ++U.VAL;
173   else
174     tcIncrement(U.pVal, getNumWords());
175   return clearUnusedBits();
176 }
177 
178 /// Prefix decrement operator. Decrements the APInt by one.
179 APInt& APInt::operator--() {
180   if (isSingleWord())
181     --U.VAL;
182   else
183     tcDecrement(U.pVal, getNumWords());
184   return clearUnusedBits();
185 }
186 
187 /// Adds the RHS APInt to this APInt.
188 /// @returns this, after addition of RHS.
189 /// Addition assignment operator.
190 APInt& APInt::operator+=(const APInt& RHS) {
191   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
192   if (isSingleWord())
193     U.VAL += RHS.U.VAL;
194   else
195     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
196   return clearUnusedBits();
197 }
198 
199 APInt& APInt::operator+=(uint64_t RHS) {
200   if (isSingleWord())
201     U.VAL += RHS;
202   else
203     tcAddPart(U.pVal, RHS, getNumWords());
204   return clearUnusedBits();
205 }
206 
207 /// Subtracts the RHS APInt from this APInt
208 /// @returns this, after subtraction
209 /// Subtraction assignment operator.
210 APInt& APInt::operator-=(const APInt& RHS) {
211   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
212   if (isSingleWord())
213     U.VAL -= RHS.U.VAL;
214   else
215     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
216   return clearUnusedBits();
217 }
218 
219 APInt& APInt::operator-=(uint64_t RHS) {
220   if (isSingleWord())
221     U.VAL -= RHS;
222   else
223     tcSubtractPart(U.pVal, RHS, getNumWords());
224   return clearUnusedBits();
225 }
226 
227 APInt APInt::operator*(const APInt& RHS) const {
228   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
229   if (isSingleWord())
230     return APInt(BitWidth, U.VAL * RHS.U.VAL);
231 
232   APInt Result(getMemory(getNumWords()), getBitWidth());
233   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
234   Result.clearUnusedBits();
235   return Result;
236 }
237 
238 void APInt::AndAssignSlowCase(const APInt &RHS) {
239   WordType *dst = U.pVal, *rhs = RHS.U.pVal;
240   for (size_t i = 0, e = getNumWords(); i != e; ++i)
241     dst[i] &= rhs[i];
242 }
243 
244 void APInt::OrAssignSlowCase(const APInt &RHS) {
245   WordType *dst = U.pVal, *rhs = RHS.U.pVal;
246   for (size_t i = 0, e = getNumWords(); i != e; ++i)
247     dst[i] |= rhs[i];
248 }
249 
250 void APInt::XorAssignSlowCase(const APInt &RHS) {
251   WordType *dst = U.pVal, *rhs = RHS.U.pVal;
252   for (size_t i = 0, e = getNumWords(); i != e; ++i)
253     dst[i] ^= rhs[i];
254 }
255 
256 APInt &APInt::operator*=(const APInt &RHS) {
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORDTYPE_MAX;
328 }
329 
330 // Complement a bignum in-place.
331 static void tcComplement(APInt::WordType *dst, unsigned parts) {
332   for (unsigned i = 0; i < parts; i++)
333     dst[i] = ~dst[i];
334 }
335 
336 /// Toggle every bit to its opposite value.
337 void APInt::flipAllBitsSlowCase() {
338   tcComplement(U.pVal, getNumWords());
339   clearUnusedBits();
340 }
341 
342 /// Toggle a given bit to its opposite value whose position is given
343 /// as "bitPosition".
344 /// Toggles a given bit to its opposite value.
345 void APInt::flipBit(unsigned bitPosition) {
346   assert(bitPosition < BitWidth && "Out of the bit-width range!");
347   setBitVal(bitPosition, !(*this)[bitPosition]);
348 }
349 
350 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
351   unsigned subBitWidth = subBits.getBitWidth();
352   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
353          "Illegal bit insertion");
354 
355   // Insertion is a direct copy.
356   if (subBitWidth == BitWidth) {
357     *this = subBits;
358     return;
359   }
360 
361   // Single word result can be done as a direct bitmask.
362   if (isSingleWord()) {
363     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
364     U.VAL &= ~(mask << bitPosition);
365     U.VAL |= (subBits.U.VAL << bitPosition);
366     return;
367   }
368 
369   unsigned loBit = whichBit(bitPosition);
370   unsigned loWord = whichWord(bitPosition);
371   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
372 
373   // Insertion within a single word can be done as a direct bitmask.
374   if (loWord == hi1Word) {
375     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
376     U.pVal[loWord] &= ~(mask << loBit);
377     U.pVal[loWord] |= (subBits.U.VAL << loBit);
378     return;
379   }
380 
381   // Insert on word boundaries.
382   if (loBit == 0) {
383     // Direct copy whole words.
384     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
385     memcpy(U.pVal + loWord, subBits.getRawData(),
386            numWholeSubWords * APINT_WORD_SIZE);
387 
388     // Mask+insert remaining bits.
389     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
390     if (remainingBits != 0) {
391       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
392       U.pVal[hi1Word] &= ~mask;
393       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
394     }
395     return;
396   }
397 
398   // General case - set/clear individual bits in dst based on src.
399   // TODO - there is scope for optimization here, but at the moment this code
400   // path is barely used so prefer readability over performance.
401   for (unsigned i = 0; i != subBitWidth; ++i)
402     setBitVal(bitPosition + i, subBits[i]);
403 }
404 
405 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
406   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
407   subBits &= maskBits;
408   if (isSingleWord()) {
409     U.VAL &= ~(maskBits << bitPosition);
410     U.VAL |= subBits << bitPosition;
411     return;
412   }
413 
414   unsigned loBit = whichBit(bitPosition);
415   unsigned loWord = whichWord(bitPosition);
416   unsigned hiWord = whichWord(bitPosition + numBits - 1);
417   if (loWord == hiWord) {
418     U.pVal[loWord] &= ~(maskBits << loBit);
419     U.pVal[loWord] |= subBits << loBit;
420     return;
421   }
422 
423   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
424   unsigned wordBits = 8 * sizeof(WordType);
425   U.pVal[loWord] &= ~(maskBits << loBit);
426   U.pVal[loWord] |= subBits << loBit;
427 
428   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
429   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
430 }
431 
432 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
433   assert(numBits > 0 && "Can't extract zero bits");
434   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
435          "Illegal bit extraction");
436 
437   if (isSingleWord())
438     return APInt(numBits, U.VAL >> bitPosition);
439 
440   unsigned loBit = whichBit(bitPosition);
441   unsigned loWord = whichWord(bitPosition);
442   unsigned hiWord = whichWord(bitPosition + numBits - 1);
443 
444   // Single word result extracting bits from a single word source.
445   if (loWord == hiWord)
446     return APInt(numBits, U.pVal[loWord] >> loBit);
447 
448   // Extracting bits that start on a source word boundary can be done
449   // as a fast memory copy.
450   if (loBit == 0)
451     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
452 
453   // General case - shift + copy source words directly into place.
454   APInt Result(numBits, 0);
455   unsigned NumSrcWords = getNumWords();
456   unsigned NumDstWords = Result.getNumWords();
457 
458   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
459   for (unsigned word = 0; word < NumDstWords; ++word) {
460     uint64_t w0 = U.pVal[loWord + word];
461     uint64_t w1 =
462         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
463     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
464   }
465 
466   return Result.clearUnusedBits();
467 }
468 
469 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
470                                        unsigned bitPosition) const {
471   assert(numBits > 0 && "Can't extract zero bits");
472   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
473          "Illegal bit extraction");
474   assert(numBits <= 64 && "Illegal bit extraction");
475 
476   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
477   if (isSingleWord())
478     return (U.VAL >> bitPosition) & maskBits;
479 
480   unsigned loBit = whichBit(bitPosition);
481   unsigned loWord = whichWord(bitPosition);
482   unsigned hiWord = whichWord(bitPosition + numBits - 1);
483   if (loWord == hiWord)
484     return (U.pVal[loWord] >> loBit) & maskBits;
485 
486   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
487   unsigned wordBits = 8 * sizeof(WordType);
488   uint64_t retBits = U.pVal[loWord] >> loBit;
489   retBits |= U.pVal[hiWord] << (wordBits - loBit);
490   retBits &= maskBits;
491   return retBits;
492 }
493 
494 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
495   assert(!str.empty() && "Invalid string length");
496   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
497           radix == 36) &&
498          "Radix should be 2, 8, 10, 16, or 36!");
499 
500   size_t slen = str.size();
501 
502   // Each computation below needs to know if it's negative.
503   StringRef::iterator p = str.begin();
504   unsigned isNegative = *p == '-';
505   if (*p == '-' || *p == '+') {
506     p++;
507     slen--;
508     assert(slen && "String is only a sign, needs a value.");
509   }
510 
511   // For radixes of power-of-two values, the bits required is accurately and
512   // easily computed
513   if (radix == 2)
514     return slen + isNegative;
515   if (radix == 8)
516     return slen * 3 + isNegative;
517   if (radix == 16)
518     return slen * 4 + isNegative;
519 
520   // FIXME: base 36
521 
522   // This is grossly inefficient but accurate. We could probably do something
523   // with a computation of roughly slen*64/20 and then adjust by the value of
524   // the first few digits. But, I'm not sure how accurate that could be.
525 
526   // Compute a sufficient number of bits that is always large enough but might
527   // be too large. This avoids the assertion in the constructor. This
528   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
529   // bits in that case.
530   unsigned sufficient
531     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
532                  : (slen == 1 ? 7 : slen * 16/3);
533 
534   // Convert to the actual binary value.
535   APInt tmp(sufficient, StringRef(p, slen), radix);
536 
537   // Compute how many bits are required. If the log is infinite, assume we need
538   // just bit. If the log is exact and value is negative, then the value is
539   // MinSignedValue with (log + 1) bits.
540   unsigned log = tmp.logBase2();
541   if (log == (unsigned)-1) {
542     return isNegative + 1;
543   } else if (isNegative && tmp.isPowerOf2()) {
544     return isNegative + log;
545   } else {
546     return isNegative + log + 1;
547   }
548 }
549 
550 hash_code llvm::hash_value(const APInt &Arg) {
551   if (Arg.isSingleWord())
552     return hash_combine(Arg.BitWidth, Arg.U.VAL);
553 
554   return hash_combine(
555       Arg.BitWidth,
556       hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
557 }
558 
559 unsigned DenseMapInfo<APInt>::getHashValue(const APInt &Key) {
560   return static_cast<unsigned>(hash_value(Key));
561 }
562 
563 bool APInt::isSplat(unsigned SplatSizeInBits) const {
564   assert(getBitWidth() % SplatSizeInBits == 0 &&
565          "SplatSizeInBits must divide width!");
566   // We can check that all parts of an integer are equal by making use of a
567   // little trick: rotate and check if it's still the same value.
568   return *this == rotl(SplatSizeInBits);
569 }
570 
571 /// This function returns the high "numBits" bits of this APInt.
572 APInt APInt::getHiBits(unsigned numBits) const {
573   return this->lshr(BitWidth - numBits);
574 }
575 
576 /// This function returns the low "numBits" bits of this APInt.
577 APInt APInt::getLoBits(unsigned numBits) const {
578   APInt Result(getLowBitsSet(BitWidth, numBits));
579   Result &= *this;
580   return Result;
581 }
582 
583 /// Return a value containing V broadcasted over NewLen bits.
584 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
585   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
586 
587   APInt Val = V.zextOrSelf(NewLen);
588   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
589     Val |= Val << I;
590 
591   return Val;
592 }
593 
594 unsigned APInt::countLeadingZerosSlowCase() const {
595   unsigned Count = 0;
596   for (int i = getNumWords()-1; i >= 0; --i) {
597     uint64_t V = U.pVal[i];
598     if (V == 0)
599       Count += APINT_BITS_PER_WORD;
600     else {
601       Count += llvm::countLeadingZeros(V);
602       break;
603     }
604   }
605   // Adjust for unused bits in the most significant word (they are zero).
606   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
607   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
608   return Count;
609 }
610 
611 unsigned APInt::countLeadingOnesSlowCase() const {
612   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
613   unsigned shift;
614   if (!highWordBits) {
615     highWordBits = APINT_BITS_PER_WORD;
616     shift = 0;
617   } else {
618     shift = APINT_BITS_PER_WORD - highWordBits;
619   }
620   int i = getNumWords() - 1;
621   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
622   if (Count == highWordBits) {
623     for (i--; i >= 0; --i) {
624       if (U.pVal[i] == WORDTYPE_MAX)
625         Count += APINT_BITS_PER_WORD;
626       else {
627         Count += llvm::countLeadingOnes(U.pVal[i]);
628         break;
629       }
630     }
631   }
632   return Count;
633 }
634 
635 unsigned APInt::countTrailingZerosSlowCase() const {
636   unsigned Count = 0;
637   unsigned i = 0;
638   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
639     Count += APINT_BITS_PER_WORD;
640   if (i < getNumWords())
641     Count += llvm::countTrailingZeros(U.pVal[i]);
642   return std::min(Count, BitWidth);
643 }
644 
645 unsigned APInt::countTrailingOnesSlowCase() const {
646   unsigned Count = 0;
647   unsigned i = 0;
648   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
649     Count += APINT_BITS_PER_WORD;
650   if (i < getNumWords())
651     Count += llvm::countTrailingOnes(U.pVal[i]);
652   assert(Count <= BitWidth);
653   return Count;
654 }
655 
656 unsigned APInt::countPopulationSlowCase() const {
657   unsigned Count = 0;
658   for (unsigned i = 0; i < getNumWords(); ++i)
659     Count += llvm::countPopulation(U.pVal[i]);
660   return Count;
661 }
662 
663 bool APInt::intersectsSlowCase(const APInt &RHS) const {
664   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
665     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
666       return true;
667 
668   return false;
669 }
670 
671 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
672   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
673     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
674       return false;
675 
676   return true;
677 }
678 
679 APInt APInt::byteSwap() const {
680   assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
681   if (BitWidth == 16)
682     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
683   if (BitWidth == 32)
684     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
685   if (BitWidth <= 64) {
686     uint64_t Tmp1 = ByteSwap_64(U.VAL);
687     Tmp1 >>= (64 - BitWidth);
688     return APInt(BitWidth, Tmp1);
689   }
690 
691   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
692   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
693     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
694   if (Result.BitWidth != BitWidth) {
695     Result.lshrInPlace(Result.BitWidth - BitWidth);
696     Result.BitWidth = BitWidth;
697   }
698   return Result;
699 }
700 
701 APInt APInt::reverseBits() const {
702   switch (BitWidth) {
703   case 64:
704     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
705   case 32:
706     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
707   case 16:
708     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
709   case 8:
710     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
711   case 0:
712     return *this;
713   default:
714     break;
715   }
716 
717   APInt Val(*this);
718   APInt Reversed(BitWidth, 0);
719   unsigned S = BitWidth;
720 
721   for (; Val != 0; Val.lshrInPlace(1)) {
722     Reversed <<= 1;
723     Reversed |= Val[0];
724     --S;
725   }
726 
727   Reversed <<= S;
728   return Reversed;
729 }
730 
731 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
732   // Fast-path a common case.
733   if (A == B) return A;
734 
735   // Corner cases: if either operand is zero, the other is the gcd.
736   if (!A) return B;
737   if (!B) return A;
738 
739   // Count common powers of 2 and remove all other powers of 2.
740   unsigned Pow2;
741   {
742     unsigned Pow2_A = A.countTrailingZeros();
743     unsigned Pow2_B = B.countTrailingZeros();
744     if (Pow2_A > Pow2_B) {
745       A.lshrInPlace(Pow2_A - Pow2_B);
746       Pow2 = Pow2_B;
747     } else if (Pow2_B > Pow2_A) {
748       B.lshrInPlace(Pow2_B - Pow2_A);
749       Pow2 = Pow2_A;
750     } else {
751       Pow2 = Pow2_A;
752     }
753   }
754 
755   // Both operands are odd multiples of 2^Pow_2:
756   //
757   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
758   //
759   // This is a modified version of Stein's algorithm, taking advantage of
760   // efficient countTrailingZeros().
761   while (A != B) {
762     if (A.ugt(B)) {
763       A -= B;
764       A.lshrInPlace(A.countTrailingZeros() - Pow2);
765     } else {
766       B -= A;
767       B.lshrInPlace(B.countTrailingZeros() - Pow2);
768     }
769   }
770 
771   return A;
772 }
773 
774 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
775   uint64_t I = bit_cast<uint64_t>(Double);
776 
777   // Get the sign bit from the highest order bit
778   bool isNeg = I >> 63;
779 
780   // Get the 11-bit exponent and adjust for the 1023 bit bias
781   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
782 
783   // If the exponent is negative, the value is < 0 so just return 0.
784   if (exp < 0)
785     return APInt(width, 0u);
786 
787   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
788   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
789 
790   // If the exponent doesn't shift all bits out of the mantissa
791   if (exp < 52)
792     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
793                     APInt(width, mantissa >> (52 - exp));
794 
795   // If the client didn't provide enough bits for us to shift the mantissa into
796   // then the result is undefined, just return 0
797   if (width <= exp - 52)
798     return APInt(width, 0);
799 
800   // Otherwise, we have to shift the mantissa bits up to the right location
801   APInt Tmp(width, mantissa);
802   Tmp <<= (unsigned)exp - 52;
803   return isNeg ? -Tmp : Tmp;
804 }
805 
806 /// This function converts this APInt to a double.
807 /// The layout for double is as following (IEEE Standard 754):
808 ///  --------------------------------------
809 /// |  Sign    Exponent    Fraction    Bias |
810 /// |-------------------------------------- |
811 /// |  1[63]   11[62-52]   52[51-00]   1023 |
812 ///  --------------------------------------
813 double APInt::roundToDouble(bool isSigned) const {
814 
815   // Handle the simple case where the value is contained in one uint64_t.
816   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
817   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
818     if (isSigned) {
819       int64_t sext = SignExtend64(getWord(0), BitWidth);
820       return double(sext);
821     } else
822       return double(getWord(0));
823   }
824 
825   // Determine if the value is negative.
826   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
827 
828   // Construct the absolute value if we're negative.
829   APInt Tmp(isNeg ? -(*this) : (*this));
830 
831   // Figure out how many bits we're using.
832   unsigned n = Tmp.getActiveBits();
833 
834   // The exponent (without bias normalization) is just the number of bits
835   // we are using. Note that the sign bit is gone since we constructed the
836   // absolute value.
837   uint64_t exp = n;
838 
839   // Return infinity for exponent overflow
840   if (exp > 1023) {
841     if (!isSigned || !isNeg)
842       return std::numeric_limits<double>::infinity();
843     else
844       return -std::numeric_limits<double>::infinity();
845   }
846   exp += 1023; // Increment for 1023 bias
847 
848   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
849   // extract the high 52 bits from the correct words in pVal.
850   uint64_t mantissa;
851   unsigned hiWord = whichWord(n-1);
852   if (hiWord == 0) {
853     mantissa = Tmp.U.pVal[0];
854     if (n > 52)
855       mantissa >>= n - 52; // shift down, we want the top 52 bits.
856   } else {
857     assert(hiWord > 0 && "huh?");
858     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
859     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
860     mantissa = hibits | lobits;
861   }
862 
863   // The leading bit of mantissa is implicit, so get rid of it.
864   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
865   uint64_t I = sign | (exp << 52) | mantissa;
866   return bit_cast<double>(I);
867 }
868 
869 // Truncate to new width.
870 APInt APInt::trunc(unsigned width) const {
871   assert(width < BitWidth && "Invalid APInt Truncate request");
872 
873   if (width <= APINT_BITS_PER_WORD)
874     return APInt(width, getRawData()[0]);
875 
876   APInt Result(getMemory(getNumWords(width)), width);
877 
878   // Copy full words.
879   unsigned i;
880   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
881     Result.U.pVal[i] = U.pVal[i];
882 
883   // Truncate and copy any partial word.
884   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
885   if (bits != 0)
886     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
887 
888   return Result;
889 }
890 
891 // Truncate to new width with unsigned saturation.
892 APInt APInt::truncUSat(unsigned width) const {
893   assert(width < BitWidth && "Invalid APInt Truncate request");
894 
895   // Can we just losslessly truncate it?
896   if (isIntN(width))
897     return trunc(width);
898   // If not, then just return the new limit.
899   return APInt::getMaxValue(width);
900 }
901 
902 // Truncate to new width with signed saturation.
903 APInt APInt::truncSSat(unsigned width) const {
904   assert(width < BitWidth && "Invalid APInt Truncate request");
905 
906   // Can we just losslessly truncate it?
907   if (isSignedIntN(width))
908     return trunc(width);
909   // If not, then just return the new limits.
910   return isNegative() ? APInt::getSignedMinValue(width)
911                       : APInt::getSignedMaxValue(width);
912 }
913 
914 // Sign extend to a new width.
915 APInt APInt::sext(unsigned Width) const {
916   assert(Width > BitWidth && "Invalid APInt SignExtend request");
917 
918   if (Width <= APINT_BITS_PER_WORD)
919     return APInt(Width, SignExtend64(U.VAL, BitWidth));
920 
921   APInt Result(getMemory(getNumWords(Width)), Width);
922 
923   // Copy words.
924   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
925 
926   // Sign extend the last word since there may be unused bits in the input.
927   Result.U.pVal[getNumWords() - 1] =
928       SignExtend64(Result.U.pVal[getNumWords() - 1],
929                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
930 
931   // Fill with sign bits.
932   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
933               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
934   Result.clearUnusedBits();
935   return Result;
936 }
937 
938 //  Zero extend to a new width.
939 APInt APInt::zext(unsigned width) const {
940   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
941 
942   if (width <= APINT_BITS_PER_WORD)
943     return APInt(width, U.VAL);
944 
945   APInt Result(getMemory(getNumWords(width)), width);
946 
947   // Copy words.
948   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
949 
950   // Zero remaining words.
951   std::memset(Result.U.pVal + getNumWords(), 0,
952               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
953 
954   return Result;
955 }
956 
957 APInt APInt::zextOrTrunc(unsigned width) const {
958   if (BitWidth < width)
959     return zext(width);
960   if (BitWidth > width)
961     return trunc(width);
962   return *this;
963 }
964 
965 APInt APInt::sextOrTrunc(unsigned width) const {
966   if (BitWidth < width)
967     return sext(width);
968   if (BitWidth > width)
969     return trunc(width);
970   return *this;
971 }
972 
973 APInt APInt::truncOrSelf(unsigned width) const {
974   if (BitWidth > width)
975     return trunc(width);
976   return *this;
977 }
978 
979 APInt APInt::zextOrSelf(unsigned width) const {
980   if (BitWidth < width)
981     return zext(width);
982   return *this;
983 }
984 
985 APInt APInt::sextOrSelf(unsigned width) const {
986   if (BitWidth < width)
987     return sext(width);
988   return *this;
989 }
990 
991 /// Arithmetic right-shift this APInt by shiftAmt.
992 /// Arithmetic right-shift function.
993 void APInt::ashrInPlace(const APInt &shiftAmt) {
994   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
995 }
996 
997 /// Arithmetic right-shift this APInt by shiftAmt.
998 /// Arithmetic right-shift function.
999 void APInt::ashrSlowCase(unsigned ShiftAmt) {
1000   // Don't bother performing a no-op shift.
1001   if (!ShiftAmt)
1002     return;
1003 
1004   // Save the original sign bit for later.
1005   bool Negative = isNegative();
1006 
1007   // WordShift is the inter-part shift; BitShift is intra-part shift.
1008   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1009   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1010 
1011   unsigned WordsToMove = getNumWords() - WordShift;
1012   if (WordsToMove != 0) {
1013     // Sign extend the last word to fill in the unused bits.
1014     U.pVal[getNumWords() - 1] = SignExtend64(
1015         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1016 
1017     // Fastpath for moving by whole words.
1018     if (BitShift == 0) {
1019       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1020     } else {
1021       // Move the words containing significant bits.
1022       for (unsigned i = 0; i != WordsToMove - 1; ++i)
1023         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1024                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1025 
1026       // Handle the last word which has no high bits to copy.
1027       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1028       // Sign extend one more time.
1029       U.pVal[WordsToMove - 1] =
1030           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1031     }
1032   }
1033 
1034   // Fill in the remainder based on the original sign.
1035   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1036               WordShift * APINT_WORD_SIZE);
1037   clearUnusedBits();
1038 }
1039 
1040 /// Logical right-shift this APInt by shiftAmt.
1041 /// Logical right-shift function.
1042 void APInt::lshrInPlace(const APInt &shiftAmt) {
1043   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1044 }
1045 
1046 /// Logical right-shift this APInt by shiftAmt.
1047 /// Logical right-shift function.
1048 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1049   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1050 }
1051 
1052 /// Left-shift this APInt by shiftAmt.
1053 /// Left-shift function.
1054 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1055   // It's undefined behavior in C to shift by BitWidth or greater.
1056   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1057   return *this;
1058 }
1059 
1060 void APInt::shlSlowCase(unsigned ShiftAmt) {
1061   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1062   clearUnusedBits();
1063 }
1064 
1065 // Calculate the rotate amount modulo the bit width.
1066 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1067   if (BitWidth == 0)
1068     return 0;
1069   unsigned rotBitWidth = rotateAmt.getBitWidth();
1070   APInt rot = rotateAmt;
1071   if (rotBitWidth < BitWidth) {
1072     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1073     // e.g. APInt(1, 32) would give APInt(1, 0).
1074     rot = rotateAmt.zext(BitWidth);
1075   }
1076   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1077   return rot.getLimitedValue(BitWidth);
1078 }
1079 
1080 APInt APInt::rotl(const APInt &rotateAmt) const {
1081   return rotl(rotateModulo(BitWidth, rotateAmt));
1082 }
1083 
1084 APInt APInt::rotl(unsigned rotateAmt) const {
1085   if (BitWidth == 0)
1086     return *this;
1087   rotateAmt %= BitWidth;
1088   if (rotateAmt == 0)
1089     return *this;
1090   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1091 }
1092 
1093 APInt APInt::rotr(const APInt &rotateAmt) const {
1094   return rotr(rotateModulo(BitWidth, rotateAmt));
1095 }
1096 
1097 APInt APInt::rotr(unsigned rotateAmt) const {
1098   if (BitWidth == 0)
1099     return *this;
1100   rotateAmt %= BitWidth;
1101   if (rotateAmt == 0)
1102     return *this;
1103   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1104 }
1105 
1106 /// \returns the nearest log base 2 of this APInt. Ties round up.
1107 ///
1108 /// NOTE: When we have a BitWidth of 1, we define:
1109 ///
1110 ///   log2(0) = UINT32_MAX
1111 ///   log2(1) = 0
1112 ///
1113 /// to get around any mathematical concerns resulting from
1114 /// referencing 2 in a space where 2 does no exist.
1115 unsigned APInt::nearestLogBase2() const {
1116   // Special case when we have a bitwidth of 1. If VAL is 1, then we
1117   // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1118   // UINT32_MAX.
1119   if (BitWidth == 1)
1120     return U.VAL - 1;
1121 
1122   // Handle the zero case.
1123   if (isZero())
1124     return UINT32_MAX;
1125 
1126   // The non-zero case is handled by computing:
1127   //
1128   //   nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1129   //
1130   // where x[i] is referring to the value of the ith bit of x.
1131   unsigned lg = logBase2();
1132   return lg + unsigned((*this)[lg - 1]);
1133 }
1134 
1135 // Square Root - this method computes and returns the square root of "this".
1136 // Three mechanisms are used for computation. For small values (<= 5 bits),
1137 // a table lookup is done. This gets some performance for common cases. For
1138 // values using less than 52 bits, the value is converted to double and then
1139 // the libc sqrt function is called. The result is rounded and then converted
1140 // back to a uint64_t which is then used to construct the result. Finally,
1141 // the Babylonian method for computing square roots is used.
1142 APInt APInt::sqrt() const {
1143 
1144   // Determine the magnitude of the value.
1145   unsigned magnitude = getActiveBits();
1146 
1147   // Use a fast table for some small values. This also gets rid of some
1148   // rounding errors in libc sqrt for small values.
1149   if (magnitude <= 5) {
1150     static const uint8_t results[32] = {
1151       /*     0 */ 0,
1152       /*  1- 2 */ 1, 1,
1153       /*  3- 6 */ 2, 2, 2, 2,
1154       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1155       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1156       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1157       /*    31 */ 6
1158     };
1159     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1160   }
1161 
1162   // If the magnitude of the value fits in less than 52 bits (the precision of
1163   // an IEEE double precision floating point value), then we can use the
1164   // libc sqrt function which will probably use a hardware sqrt computation.
1165   // This should be faster than the algorithm below.
1166   if (magnitude < 52) {
1167     return APInt(BitWidth,
1168                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1169                                                                : U.pVal[0])))));
1170   }
1171 
1172   // Okay, all the short cuts are exhausted. We must compute it. The following
1173   // is a classical Babylonian method for computing the square root. This code
1174   // was adapted to APInt from a wikipedia article on such computations.
1175   // See http://www.wikipedia.org/ and go to the page named
1176   // Calculate_an_integer_square_root.
1177   unsigned nbits = BitWidth, i = 4;
1178   APInt testy(BitWidth, 16);
1179   APInt x_old(BitWidth, 1);
1180   APInt x_new(BitWidth, 0);
1181   APInt two(BitWidth, 2);
1182 
1183   // Select a good starting value using binary logarithms.
1184   for (;; i += 2, testy = testy.shl(2))
1185     if (i >= nbits || this->ule(testy)) {
1186       x_old = x_old.shl(i / 2);
1187       break;
1188     }
1189 
1190   // Use the Babylonian method to arrive at the integer square root:
1191   for (;;) {
1192     x_new = (this->udiv(x_old) + x_old).udiv(two);
1193     if (x_old.ule(x_new))
1194       break;
1195     x_old = x_new;
1196   }
1197 
1198   // Make sure we return the closest approximation
1199   // NOTE: The rounding calculation below is correct. It will produce an
1200   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1201   // determined to be a rounding issue with pari/gp as it begins to use a
1202   // floating point representation after 192 bits. There are no discrepancies
1203   // between this algorithm and pari/gp for bit widths < 192 bits.
1204   APInt square(x_old * x_old);
1205   APInt nextSquare((x_old + 1) * (x_old +1));
1206   if (this->ult(square))
1207     return x_old;
1208   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1209   APInt midpoint((nextSquare - square).udiv(two));
1210   APInt offset(*this - square);
1211   if (offset.ult(midpoint))
1212     return x_old;
1213   return x_old + 1;
1214 }
1215 
1216 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1217 /// iterative extended Euclidean algorithm is used to solve for this value,
1218 /// however we simplify it to speed up calculating only the inverse, and take
1219 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1220 /// (potentially large) APInts around.
1221 /// WARNING: a value of '0' may be returned,
1222 ///          signifying that no multiplicative inverse exists!
1223 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1224   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1225 
1226   // Using the properties listed at the following web page (accessed 06/21/08):
1227   //   http://www.numbertheory.org/php/euclid.html
1228   // (especially the properties numbered 3, 4 and 9) it can be proved that
1229   // BitWidth bits suffice for all the computations in the algorithm implemented
1230   // below. More precisely, this number of bits suffice if the multiplicative
1231   // inverse exists, but may not suffice for the general extended Euclidean
1232   // algorithm.
1233 
1234   APInt r[2] = { modulo, *this };
1235   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1236   APInt q(BitWidth, 0);
1237 
1238   unsigned i;
1239   for (i = 0; r[i^1] != 0; i ^= 1) {
1240     // An overview of the math without the confusing bit-flipping:
1241     // q = r[i-2] / r[i-1]
1242     // r[i] = r[i-2] % r[i-1]
1243     // t[i] = t[i-2] - t[i-1] * q
1244     udivrem(r[i], r[i^1], q, r[i]);
1245     t[i] -= t[i^1] * q;
1246   }
1247 
1248   // If this APInt and the modulo are not coprime, there is no multiplicative
1249   // inverse, so return 0. We check this by looking at the next-to-last
1250   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1251   // algorithm.
1252   if (r[i] != 1)
1253     return APInt(BitWidth, 0);
1254 
1255   // The next-to-last t is the multiplicative inverse.  However, we are
1256   // interested in a positive inverse. Calculate a positive one from a negative
1257   // one if necessary. A simple addition of the modulo suffices because
1258   // abs(t[i]) is known to be less than *this/2 (see the link above).
1259   if (t[i].isNegative())
1260     t[i] += modulo;
1261 
1262   return std::move(t[i]);
1263 }
1264 
1265 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1266 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1267 /// variables here have the same names as in the algorithm. Comments explain
1268 /// the algorithm and any deviation from it.
1269 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1270                      unsigned m, unsigned n) {
1271   assert(u && "Must provide dividend");
1272   assert(v && "Must provide divisor");
1273   assert(q && "Must provide quotient");
1274   assert(u != v && u != q && v != q && "Must use different memory");
1275   assert(n>1 && "n must be > 1");
1276 
1277   // b denotes the base of the number system. In our case b is 2^32.
1278   const uint64_t b = uint64_t(1) << 32;
1279 
1280 // The DEBUG macros here tend to be spam in the debug output if you're not
1281 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1282 #ifdef KNUTH_DEBUG
1283 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1284 #else
1285 #define DEBUG_KNUTH(X) do {} while(false)
1286 #endif
1287 
1288   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1289   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1290   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1291   DEBUG_KNUTH(dbgs() << " by");
1292   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1293   DEBUG_KNUTH(dbgs() << '\n');
1294   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1295   // u and v by d. Note that we have taken Knuth's advice here to use a power
1296   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1297   // 2 allows us to shift instead of multiply and it is easy to determine the
1298   // shift amount from the leading zeros.  We are basically normalizing the u
1299   // and v so that its high bits are shifted to the top of v's range without
1300   // overflow. Note that this can require an extra word in u so that u must
1301   // be of length m+n+1.
1302   unsigned shift = countLeadingZeros(v[n-1]);
1303   uint32_t v_carry = 0;
1304   uint32_t u_carry = 0;
1305   if (shift) {
1306     for (unsigned i = 0; i < m+n; ++i) {
1307       uint32_t u_tmp = u[i] >> (32 - shift);
1308       u[i] = (u[i] << shift) | u_carry;
1309       u_carry = u_tmp;
1310     }
1311     for (unsigned i = 0; i < n; ++i) {
1312       uint32_t v_tmp = v[i] >> (32 - shift);
1313       v[i] = (v[i] << shift) | v_carry;
1314       v_carry = v_tmp;
1315     }
1316   }
1317   u[m+n] = u_carry;
1318 
1319   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1320   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1321   DEBUG_KNUTH(dbgs() << " by");
1322   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1323   DEBUG_KNUTH(dbgs() << '\n');
1324 
1325   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1326   int j = m;
1327   do {
1328     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1329     // D3. [Calculate q'.].
1330     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1331     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1332     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1333     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1334     // on v[n-2] determines at high speed most of the cases in which the trial
1335     // value qp is one too large, and it eliminates all cases where qp is two
1336     // too large.
1337     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1338     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1339     uint64_t qp = dividend / v[n-1];
1340     uint64_t rp = dividend % v[n-1];
1341     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1342       qp--;
1343       rp += v[n-1];
1344       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1345         qp--;
1346     }
1347     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1348 
1349     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1350     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1351     // consists of a simple multiplication by a one-place number, combined with
1352     // a subtraction.
1353     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1354     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1355     // true value plus b**(n+1), namely as the b's complement of
1356     // the true value, and a "borrow" to the left should be remembered.
1357     int64_t borrow = 0;
1358     for (unsigned i = 0; i < n; ++i) {
1359       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1360       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1361       u[j+i] = Lo_32(subres);
1362       borrow = Hi_32(p) - Hi_32(subres);
1363       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1364                         << ", borrow = " << borrow << '\n');
1365     }
1366     bool isNeg = u[j+n] < borrow;
1367     u[j+n] -= Lo_32(borrow);
1368 
1369     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1370     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1371     DEBUG_KNUTH(dbgs() << '\n');
1372 
1373     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1374     // negative, go to step D6; otherwise go on to step D7.
1375     q[j] = Lo_32(qp);
1376     if (isNeg) {
1377       // D6. [Add back]. The probability that this step is necessary is very
1378       // small, on the order of only 2/b. Make sure that test data accounts for
1379       // this possibility. Decrease q[j] by 1
1380       q[j]--;
1381       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1382       // A carry will occur to the left of u[j+n], and it should be ignored
1383       // since it cancels with the borrow that occurred in D4.
1384       bool carry = false;
1385       for (unsigned i = 0; i < n; i++) {
1386         uint32_t limit = std::min(u[j+i],v[i]);
1387         u[j+i] += v[i] + carry;
1388         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1389       }
1390       u[j+n] += carry;
1391     }
1392     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1393     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1394     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1395 
1396     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1397   } while (--j >= 0);
1398 
1399   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1400   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1401   DEBUG_KNUTH(dbgs() << '\n');
1402 
1403   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1404   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1405   // compute the remainder (urem uses this).
1406   if (r) {
1407     // The value d is expressed by the "shift" value above since we avoided
1408     // multiplication by d by using a shift left. So, all we have to do is
1409     // shift right here.
1410     if (shift) {
1411       uint32_t carry = 0;
1412       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1413       for (int i = n-1; i >= 0; i--) {
1414         r[i] = (u[i] >> shift) | carry;
1415         carry = u[i] << (32 - shift);
1416         DEBUG_KNUTH(dbgs() << " " << r[i]);
1417       }
1418     } else {
1419       for (int i = n-1; i >= 0; i--) {
1420         r[i] = u[i];
1421         DEBUG_KNUTH(dbgs() << " " << r[i]);
1422       }
1423     }
1424     DEBUG_KNUTH(dbgs() << '\n');
1425   }
1426   DEBUG_KNUTH(dbgs() << '\n');
1427 }
1428 
1429 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1430                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1431   assert(lhsWords >= rhsWords && "Fractional result");
1432 
1433   // First, compose the values into an array of 32-bit words instead of
1434   // 64-bit words. This is a necessity of both the "short division" algorithm
1435   // and the Knuth "classical algorithm" which requires there to be native
1436   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1437   // can't use 64-bit operands here because we don't have native results of
1438   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1439   // work on large-endian machines.
1440   unsigned n = rhsWords * 2;
1441   unsigned m = (lhsWords * 2) - n;
1442 
1443   // Allocate space for the temporary values we need either on the stack, if
1444   // it will fit, or on the heap if it won't.
1445   uint32_t SPACE[128];
1446   uint32_t *U = nullptr;
1447   uint32_t *V = nullptr;
1448   uint32_t *Q = nullptr;
1449   uint32_t *R = nullptr;
1450   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1451     U = &SPACE[0];
1452     V = &SPACE[m+n+1];
1453     Q = &SPACE[(m+n+1) + n];
1454     if (Remainder)
1455       R = &SPACE[(m+n+1) + n + (m+n)];
1456   } else {
1457     U = new uint32_t[m + n + 1];
1458     V = new uint32_t[n];
1459     Q = new uint32_t[m+n];
1460     if (Remainder)
1461       R = new uint32_t[n];
1462   }
1463 
1464   // Initialize the dividend
1465   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1466   for (unsigned i = 0; i < lhsWords; ++i) {
1467     uint64_t tmp = LHS[i];
1468     U[i * 2] = Lo_32(tmp);
1469     U[i * 2 + 1] = Hi_32(tmp);
1470   }
1471   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1472 
1473   // Initialize the divisor
1474   memset(V, 0, (n)*sizeof(uint32_t));
1475   for (unsigned i = 0; i < rhsWords; ++i) {
1476     uint64_t tmp = RHS[i];
1477     V[i * 2] = Lo_32(tmp);
1478     V[i * 2 + 1] = Hi_32(tmp);
1479   }
1480 
1481   // initialize the quotient and remainder
1482   memset(Q, 0, (m+n) * sizeof(uint32_t));
1483   if (Remainder)
1484     memset(R, 0, n * sizeof(uint32_t));
1485 
1486   // Now, adjust m and n for the Knuth division. n is the number of words in
1487   // the divisor. m is the number of words by which the dividend exceeds the
1488   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1489   // contain any zero words or the Knuth algorithm fails.
1490   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1491     n--;
1492     m++;
1493   }
1494   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1495     m--;
1496 
1497   // If we're left with only a single word for the divisor, Knuth doesn't work
1498   // so we implement the short division algorithm here. This is much simpler
1499   // and faster because we are certain that we can divide a 64-bit quantity
1500   // by a 32-bit quantity at hardware speed and short division is simply a
1501   // series of such operations. This is just like doing short division but we
1502   // are using base 2^32 instead of base 10.
1503   assert(n != 0 && "Divide by zero?");
1504   if (n == 1) {
1505     uint32_t divisor = V[0];
1506     uint32_t remainder = 0;
1507     for (int i = m; i >= 0; i--) {
1508       uint64_t partial_dividend = Make_64(remainder, U[i]);
1509       if (partial_dividend == 0) {
1510         Q[i] = 0;
1511         remainder = 0;
1512       } else if (partial_dividend < divisor) {
1513         Q[i] = 0;
1514         remainder = Lo_32(partial_dividend);
1515       } else if (partial_dividend == divisor) {
1516         Q[i] = 1;
1517         remainder = 0;
1518       } else {
1519         Q[i] = Lo_32(partial_dividend / divisor);
1520         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1521       }
1522     }
1523     if (R)
1524       R[0] = remainder;
1525   } else {
1526     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1527     // case n > 1.
1528     KnuthDiv(U, V, Q, R, m, n);
1529   }
1530 
1531   // If the caller wants the quotient
1532   if (Quotient) {
1533     for (unsigned i = 0; i < lhsWords; ++i)
1534       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1535   }
1536 
1537   // If the caller wants the remainder
1538   if (Remainder) {
1539     for (unsigned i = 0; i < rhsWords; ++i)
1540       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1541   }
1542 
1543   // Clean up the memory we allocated.
1544   if (U != &SPACE[0]) {
1545     delete [] U;
1546     delete [] V;
1547     delete [] Q;
1548     delete [] R;
1549   }
1550 }
1551 
1552 APInt APInt::udiv(const APInt &RHS) const {
1553   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1554 
1555   // First, deal with the easy case
1556   if (isSingleWord()) {
1557     assert(RHS.U.VAL != 0 && "Divide by zero?");
1558     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1559   }
1560 
1561   // Get some facts about the LHS and RHS number of bits and words
1562   unsigned lhsWords = getNumWords(getActiveBits());
1563   unsigned rhsBits  = RHS.getActiveBits();
1564   unsigned rhsWords = getNumWords(rhsBits);
1565   assert(rhsWords && "Divided by zero???");
1566 
1567   // Deal with some degenerate cases
1568   if (!lhsWords)
1569     // 0 / X ===> 0
1570     return APInt(BitWidth, 0);
1571   if (rhsBits == 1)
1572     // X / 1 ===> X
1573     return *this;
1574   if (lhsWords < rhsWords || this->ult(RHS))
1575     // X / Y ===> 0, iff X < Y
1576     return APInt(BitWidth, 0);
1577   if (*this == RHS)
1578     // X / X ===> 1
1579     return APInt(BitWidth, 1);
1580   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1581     // All high words are zero, just use native divide
1582     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1583 
1584   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1585   APInt Quotient(BitWidth, 0); // to hold result.
1586   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1587   return Quotient;
1588 }
1589 
1590 APInt APInt::udiv(uint64_t RHS) const {
1591   assert(RHS != 0 && "Divide by zero?");
1592 
1593   // First, deal with the easy case
1594   if (isSingleWord())
1595     return APInt(BitWidth, U.VAL / RHS);
1596 
1597   // Get some facts about the LHS words.
1598   unsigned lhsWords = getNumWords(getActiveBits());
1599 
1600   // Deal with some degenerate cases
1601   if (!lhsWords)
1602     // 0 / X ===> 0
1603     return APInt(BitWidth, 0);
1604   if (RHS == 1)
1605     // X / 1 ===> X
1606     return *this;
1607   if (this->ult(RHS))
1608     // X / Y ===> 0, iff X < Y
1609     return APInt(BitWidth, 0);
1610   if (*this == RHS)
1611     // X / X ===> 1
1612     return APInt(BitWidth, 1);
1613   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1614     // All high words are zero, just use native divide
1615     return APInt(BitWidth, this->U.pVal[0] / RHS);
1616 
1617   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1618   APInt Quotient(BitWidth, 0); // to hold result.
1619   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1620   return Quotient;
1621 }
1622 
1623 APInt APInt::sdiv(const APInt &RHS) const {
1624   if (isNegative()) {
1625     if (RHS.isNegative())
1626       return (-(*this)).udiv(-RHS);
1627     return -((-(*this)).udiv(RHS));
1628   }
1629   if (RHS.isNegative())
1630     return -(this->udiv(-RHS));
1631   return this->udiv(RHS);
1632 }
1633 
1634 APInt APInt::sdiv(int64_t RHS) const {
1635   if (isNegative()) {
1636     if (RHS < 0)
1637       return (-(*this)).udiv(-RHS);
1638     return -((-(*this)).udiv(RHS));
1639   }
1640   if (RHS < 0)
1641     return -(this->udiv(-RHS));
1642   return this->udiv(RHS);
1643 }
1644 
1645 APInt APInt::urem(const APInt &RHS) const {
1646   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1647   if (isSingleWord()) {
1648     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1649     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1650   }
1651 
1652   // Get some facts about the LHS
1653   unsigned lhsWords = getNumWords(getActiveBits());
1654 
1655   // Get some facts about the RHS
1656   unsigned rhsBits = RHS.getActiveBits();
1657   unsigned rhsWords = getNumWords(rhsBits);
1658   assert(rhsWords && "Performing remainder operation by zero ???");
1659 
1660   // Check the degenerate cases
1661   if (lhsWords == 0)
1662     // 0 % Y ===> 0
1663     return APInt(BitWidth, 0);
1664   if (rhsBits == 1)
1665     // X % 1 ===> 0
1666     return APInt(BitWidth, 0);
1667   if (lhsWords < rhsWords || this->ult(RHS))
1668     // X % Y ===> X, iff X < Y
1669     return *this;
1670   if (*this == RHS)
1671     // X % X == 0;
1672     return APInt(BitWidth, 0);
1673   if (lhsWords == 1)
1674     // All high words are zero, just use native remainder
1675     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1676 
1677   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1678   APInt Remainder(BitWidth, 0);
1679   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1680   return Remainder;
1681 }
1682 
1683 uint64_t APInt::urem(uint64_t RHS) const {
1684   assert(RHS != 0 && "Remainder by zero?");
1685 
1686   if (isSingleWord())
1687     return U.VAL % RHS;
1688 
1689   // Get some facts about the LHS
1690   unsigned lhsWords = getNumWords(getActiveBits());
1691 
1692   // Check the degenerate cases
1693   if (lhsWords == 0)
1694     // 0 % Y ===> 0
1695     return 0;
1696   if (RHS == 1)
1697     // X % 1 ===> 0
1698     return 0;
1699   if (this->ult(RHS))
1700     // X % Y ===> X, iff X < Y
1701     return getZExtValue();
1702   if (*this == RHS)
1703     // X % X == 0;
1704     return 0;
1705   if (lhsWords == 1)
1706     // All high words are zero, just use native remainder
1707     return U.pVal[0] % RHS;
1708 
1709   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1710   uint64_t Remainder;
1711   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1712   return Remainder;
1713 }
1714 
1715 APInt APInt::srem(const APInt &RHS) const {
1716   if (isNegative()) {
1717     if (RHS.isNegative())
1718       return -((-(*this)).urem(-RHS));
1719     return -((-(*this)).urem(RHS));
1720   }
1721   if (RHS.isNegative())
1722     return this->urem(-RHS);
1723   return this->urem(RHS);
1724 }
1725 
1726 int64_t APInt::srem(int64_t RHS) const {
1727   if (isNegative()) {
1728     if (RHS < 0)
1729       return -((-(*this)).urem(-RHS));
1730     return -((-(*this)).urem(RHS));
1731   }
1732   if (RHS < 0)
1733     return this->urem(-RHS);
1734   return this->urem(RHS);
1735 }
1736 
1737 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1738                     APInt &Quotient, APInt &Remainder) {
1739   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1740   unsigned BitWidth = LHS.BitWidth;
1741 
1742   // First, deal with the easy case
1743   if (LHS.isSingleWord()) {
1744     assert(RHS.U.VAL != 0 && "Divide by zero?");
1745     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1746     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1747     Quotient = APInt(BitWidth, QuotVal);
1748     Remainder = APInt(BitWidth, RemVal);
1749     return;
1750   }
1751 
1752   // Get some size facts about the dividend and divisor
1753   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1754   unsigned rhsBits  = RHS.getActiveBits();
1755   unsigned rhsWords = getNumWords(rhsBits);
1756   assert(rhsWords && "Performing divrem operation by zero ???");
1757 
1758   // Check the degenerate cases
1759   if (lhsWords == 0) {
1760     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1761     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1762     return;
1763   }
1764 
1765   if (rhsBits == 1) {
1766     Quotient = LHS;                   // X / 1 ===> X
1767     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1768   }
1769 
1770   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1771     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1772     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1773     return;
1774   }
1775 
1776   if (LHS == RHS) {
1777     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1778     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1779     return;
1780   }
1781 
1782   // Make sure there is enough space to hold the results.
1783   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1784   // change the size. This is necessary if Quotient or Remainder is aliased
1785   // with LHS or RHS.
1786   Quotient.reallocate(BitWidth);
1787   Remainder.reallocate(BitWidth);
1788 
1789   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1790     // There is only one word to consider so use the native versions.
1791     uint64_t lhsValue = LHS.U.pVal[0];
1792     uint64_t rhsValue = RHS.U.pVal[0];
1793     Quotient = lhsValue / rhsValue;
1794     Remainder = lhsValue % rhsValue;
1795     return;
1796   }
1797 
1798   // Okay, lets do it the long way
1799   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1800          Remainder.U.pVal);
1801   // Clear the rest of the Quotient and Remainder.
1802   std::memset(Quotient.U.pVal + lhsWords, 0,
1803               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1804   std::memset(Remainder.U.pVal + rhsWords, 0,
1805               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1806 }
1807 
1808 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1809                     uint64_t &Remainder) {
1810   assert(RHS != 0 && "Divide by zero?");
1811   unsigned BitWidth = LHS.BitWidth;
1812 
1813   // First, deal with the easy case
1814   if (LHS.isSingleWord()) {
1815     uint64_t QuotVal = LHS.U.VAL / RHS;
1816     Remainder = LHS.U.VAL % RHS;
1817     Quotient = APInt(BitWidth, QuotVal);
1818     return;
1819   }
1820 
1821   // Get some size facts about the dividend and divisor
1822   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1823 
1824   // Check the degenerate cases
1825   if (lhsWords == 0) {
1826     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1827     Remainder = 0;                    // 0 % Y ===> 0
1828     return;
1829   }
1830 
1831   if (RHS == 1) {
1832     Quotient = LHS;                   // X / 1 ===> X
1833     Remainder = 0;                    // X % 1 ===> 0
1834     return;
1835   }
1836 
1837   if (LHS.ult(RHS)) {
1838     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1839     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1840     return;
1841   }
1842 
1843   if (LHS == RHS) {
1844     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1845     Remainder = 0;                    // X % X ===> 0;
1846     return;
1847   }
1848 
1849   // Make sure there is enough space to hold the results.
1850   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1851   // change the size. This is necessary if Quotient is aliased with LHS.
1852   Quotient.reallocate(BitWidth);
1853 
1854   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1855     // There is only one word to consider so use the native versions.
1856     uint64_t lhsValue = LHS.U.pVal[0];
1857     Quotient = lhsValue / RHS;
1858     Remainder = lhsValue % RHS;
1859     return;
1860   }
1861 
1862   // Okay, lets do it the long way
1863   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1864   // Clear the rest of the Quotient.
1865   std::memset(Quotient.U.pVal + lhsWords, 0,
1866               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1867 }
1868 
1869 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1870                     APInt &Quotient, APInt &Remainder) {
1871   if (LHS.isNegative()) {
1872     if (RHS.isNegative())
1873       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1874     else {
1875       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1876       Quotient.negate();
1877     }
1878     Remainder.negate();
1879   } else if (RHS.isNegative()) {
1880     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1881     Quotient.negate();
1882   } else {
1883     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1884   }
1885 }
1886 
1887 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1888                     APInt &Quotient, int64_t &Remainder) {
1889   uint64_t R = Remainder;
1890   if (LHS.isNegative()) {
1891     if (RHS < 0)
1892       APInt::udivrem(-LHS, -RHS, Quotient, R);
1893     else {
1894       APInt::udivrem(-LHS, RHS, Quotient, R);
1895       Quotient.negate();
1896     }
1897     R = -R;
1898   } else if (RHS < 0) {
1899     APInt::udivrem(LHS, -RHS, Quotient, R);
1900     Quotient.negate();
1901   } else {
1902     APInt::udivrem(LHS, RHS, Quotient, R);
1903   }
1904   Remainder = R;
1905 }
1906 
1907 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1908   APInt Res = *this+RHS;
1909   Overflow = isNonNegative() == RHS.isNonNegative() &&
1910              Res.isNonNegative() != isNonNegative();
1911   return Res;
1912 }
1913 
1914 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1915   APInt Res = *this+RHS;
1916   Overflow = Res.ult(RHS);
1917   return Res;
1918 }
1919 
1920 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1921   APInt Res = *this - RHS;
1922   Overflow = isNonNegative() != RHS.isNonNegative() &&
1923              Res.isNonNegative() != isNonNegative();
1924   return Res;
1925 }
1926 
1927 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1928   APInt Res = *this-RHS;
1929   Overflow = Res.ugt(*this);
1930   return Res;
1931 }
1932 
1933 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1934   // MININT/-1  -->  overflow.
1935   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1936   return sdiv(RHS);
1937 }
1938 
1939 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1940   APInt Res = *this * RHS;
1941 
1942   if (*this != 0 && RHS != 0)
1943     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1944   else
1945     Overflow = false;
1946   return Res;
1947 }
1948 
1949 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1950   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1951     Overflow = true;
1952     return *this * RHS;
1953   }
1954 
1955   APInt Res = lshr(1) * RHS;
1956   Overflow = Res.isNegative();
1957   Res <<= 1;
1958   if ((*this)[0]) {
1959     Res += RHS;
1960     if (Res.ult(RHS))
1961       Overflow = true;
1962   }
1963   return Res;
1964 }
1965 
1966 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1967   Overflow = ShAmt.uge(getBitWidth());
1968   if (Overflow)
1969     return APInt(BitWidth, 0);
1970 
1971   if (isNonNegative()) // Don't allow sign change.
1972     Overflow = ShAmt.uge(countLeadingZeros());
1973   else
1974     Overflow = ShAmt.uge(countLeadingOnes());
1975 
1976   return *this << ShAmt;
1977 }
1978 
1979 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1980   Overflow = ShAmt.uge(getBitWidth());
1981   if (Overflow)
1982     return APInt(BitWidth, 0);
1983 
1984   Overflow = ShAmt.ugt(countLeadingZeros());
1985 
1986   return *this << ShAmt;
1987 }
1988 
1989 APInt APInt::sadd_sat(const APInt &RHS) const {
1990   bool Overflow;
1991   APInt Res = sadd_ov(RHS, Overflow);
1992   if (!Overflow)
1993     return Res;
1994 
1995   return isNegative() ? APInt::getSignedMinValue(BitWidth)
1996                       : APInt::getSignedMaxValue(BitWidth);
1997 }
1998 
1999 APInt APInt::uadd_sat(const APInt &RHS) const {
2000   bool Overflow;
2001   APInt Res = uadd_ov(RHS, Overflow);
2002   if (!Overflow)
2003     return Res;
2004 
2005   return APInt::getMaxValue(BitWidth);
2006 }
2007 
2008 APInt APInt::ssub_sat(const APInt &RHS) const {
2009   bool Overflow;
2010   APInt Res = ssub_ov(RHS, Overflow);
2011   if (!Overflow)
2012     return Res;
2013 
2014   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2015                       : APInt::getSignedMaxValue(BitWidth);
2016 }
2017 
2018 APInt APInt::usub_sat(const APInt &RHS) const {
2019   bool Overflow;
2020   APInt Res = usub_ov(RHS, Overflow);
2021   if (!Overflow)
2022     return Res;
2023 
2024   return APInt(BitWidth, 0);
2025 }
2026 
2027 APInt APInt::smul_sat(const APInt &RHS) const {
2028   bool Overflow;
2029   APInt Res = smul_ov(RHS, Overflow);
2030   if (!Overflow)
2031     return Res;
2032 
2033   // The result is negative if one and only one of inputs is negative.
2034   bool ResIsNegative = isNegative() ^ RHS.isNegative();
2035 
2036   return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2037                        : APInt::getSignedMaxValue(BitWidth);
2038 }
2039 
2040 APInt APInt::umul_sat(const APInt &RHS) const {
2041   bool Overflow;
2042   APInt Res = umul_ov(RHS, Overflow);
2043   if (!Overflow)
2044     return Res;
2045 
2046   return APInt::getMaxValue(BitWidth);
2047 }
2048 
2049 APInt APInt::sshl_sat(const APInt &RHS) const {
2050   bool Overflow;
2051   APInt Res = sshl_ov(RHS, Overflow);
2052   if (!Overflow)
2053     return Res;
2054 
2055   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2056                       : APInt::getSignedMaxValue(BitWidth);
2057 }
2058 
2059 APInt APInt::ushl_sat(const APInt &RHS) const {
2060   bool Overflow;
2061   APInt Res = ushl_ov(RHS, Overflow);
2062   if (!Overflow)
2063     return Res;
2064 
2065   return APInt::getMaxValue(BitWidth);
2066 }
2067 
2068 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2069   // Check our assumptions here
2070   assert(!str.empty() && "Invalid string length");
2071   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2072           radix == 36) &&
2073          "Radix should be 2, 8, 10, 16, or 36!");
2074 
2075   StringRef::iterator p = str.begin();
2076   size_t slen = str.size();
2077   bool isNeg = *p == '-';
2078   if (*p == '-' || *p == '+') {
2079     p++;
2080     slen--;
2081     assert(slen && "String is only a sign, needs a value.");
2082   }
2083   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2084   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2085   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2086   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2087          "Insufficient bit width");
2088 
2089   // Allocate memory if needed
2090   if (isSingleWord())
2091     U.VAL = 0;
2092   else
2093     U.pVal = getClearedMemory(getNumWords());
2094 
2095   // Figure out if we can shift instead of multiply
2096   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2097 
2098   // Enter digit traversal loop
2099   for (StringRef::iterator e = str.end(); p != e; ++p) {
2100     unsigned digit = getDigit(*p, radix);
2101     assert(digit < radix && "Invalid character in digit string");
2102 
2103     // Shift or multiply the value by the radix
2104     if (slen > 1) {
2105       if (shift)
2106         *this <<= shift;
2107       else
2108         *this *= radix;
2109     }
2110 
2111     // Add in the digit we just interpreted
2112     *this += digit;
2113   }
2114   // If its negative, put it in two's complement form
2115   if (isNeg)
2116     this->negate();
2117 }
2118 
2119 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2120                      bool Signed, bool formatAsCLiteral) const {
2121   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2122           Radix == 36) &&
2123          "Radix should be 2, 8, 10, 16, or 36!");
2124 
2125   const char *Prefix = "";
2126   if (formatAsCLiteral) {
2127     switch (Radix) {
2128       case 2:
2129         // Binary literals are a non-standard extension added in gcc 4.3:
2130         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2131         Prefix = "0b";
2132         break;
2133       case 8:
2134         Prefix = "0";
2135         break;
2136       case 10:
2137         break; // No prefix
2138       case 16:
2139         Prefix = "0x";
2140         break;
2141       default:
2142         llvm_unreachable("Invalid radix!");
2143     }
2144   }
2145 
2146   // First, check for a zero value and just short circuit the logic below.
2147   if (isZero()) {
2148     while (*Prefix) {
2149       Str.push_back(*Prefix);
2150       ++Prefix;
2151     };
2152     Str.push_back('0');
2153     return;
2154   }
2155 
2156   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2157 
2158   if (isSingleWord()) {
2159     char Buffer[65];
2160     char *BufPtr = std::end(Buffer);
2161 
2162     uint64_t N;
2163     if (!Signed) {
2164       N = getZExtValue();
2165     } else {
2166       int64_t I = getSExtValue();
2167       if (I >= 0) {
2168         N = I;
2169       } else {
2170         Str.push_back('-');
2171         N = -(uint64_t)I;
2172       }
2173     }
2174 
2175     while (*Prefix) {
2176       Str.push_back(*Prefix);
2177       ++Prefix;
2178     };
2179 
2180     while (N) {
2181       *--BufPtr = Digits[N % Radix];
2182       N /= Radix;
2183     }
2184     Str.append(BufPtr, std::end(Buffer));
2185     return;
2186   }
2187 
2188   APInt Tmp(*this);
2189 
2190   if (Signed && isNegative()) {
2191     // They want to print the signed version and it is a negative value
2192     // Flip the bits and add one to turn it into the equivalent positive
2193     // value and put a '-' in the result.
2194     Tmp.negate();
2195     Str.push_back('-');
2196   }
2197 
2198   while (*Prefix) {
2199     Str.push_back(*Prefix);
2200     ++Prefix;
2201   };
2202 
2203   // We insert the digits backward, then reverse them to get the right order.
2204   unsigned StartDig = Str.size();
2205 
2206   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2207   // because the number of bits per digit (1, 3 and 4 respectively) divides
2208   // equally.  We just shift until the value is zero.
2209   if (Radix == 2 || Radix == 8 || Radix == 16) {
2210     // Just shift tmp right for each digit width until it becomes zero
2211     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2212     unsigned MaskAmt = Radix - 1;
2213 
2214     while (Tmp.getBoolValue()) {
2215       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2216       Str.push_back(Digits[Digit]);
2217       Tmp.lshrInPlace(ShiftAmt);
2218     }
2219   } else {
2220     while (Tmp.getBoolValue()) {
2221       uint64_t Digit;
2222       udivrem(Tmp, Radix, Tmp, Digit);
2223       assert(Digit < Radix && "divide failed");
2224       Str.push_back(Digits[Digit]);
2225     }
2226   }
2227 
2228   // Reverse the digits before returning.
2229   std::reverse(Str.begin()+StartDig, Str.end());
2230 }
2231 
2232 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2233 LLVM_DUMP_METHOD void APInt::dump() const {
2234   SmallString<40> S, U;
2235   this->toStringUnsigned(U);
2236   this->toStringSigned(S);
2237   dbgs() << "APInt(" << BitWidth << "b, "
2238          << U << "u " << S << "s)\n";
2239 }
2240 #endif
2241 
2242 void APInt::print(raw_ostream &OS, bool isSigned) const {
2243   SmallString<40> S;
2244   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2245   OS << S;
2246 }
2247 
2248 // This implements a variety of operations on a representation of
2249 // arbitrary precision, two's-complement, bignum integer values.
2250 
2251 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2252 // and unrestricting assumption.
2253 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2254               "Part width must be divisible by 2!");
2255 
2256 // Returns the integer part with the least significant BITS set.
2257 // BITS cannot be zero.
2258 static inline APInt::WordType lowBitMask(unsigned bits) {
2259   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2260   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2261 }
2262 
2263 /// Returns the value of the lower half of PART.
2264 static inline APInt::WordType lowHalf(APInt::WordType part) {
2265   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2266 }
2267 
2268 /// Returns the value of the upper half of PART.
2269 static inline APInt::WordType highHalf(APInt::WordType part) {
2270   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2271 }
2272 
2273 /// Returns the bit number of the most significant set bit of a part.
2274 /// If the input number has no bits set -1U is returned.
2275 static unsigned partMSB(APInt::WordType value) {
2276   return findLastSet(value, ZB_Max);
2277 }
2278 
2279 /// Returns the bit number of the least significant set bit of a part.  If the
2280 /// input number has no bits set -1U is returned.
2281 static unsigned partLSB(APInt::WordType value) {
2282   return findFirstSet(value, ZB_Max);
2283 }
2284 
2285 /// Sets the least significant part of a bignum to the input value, and zeroes
2286 /// out higher parts.
2287 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2288   assert(parts > 0);
2289   dst[0] = part;
2290   for (unsigned i = 1; i < parts; i++)
2291     dst[i] = 0;
2292 }
2293 
2294 /// Assign one bignum to another.
2295 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2296   for (unsigned i = 0; i < parts; i++)
2297     dst[i] = src[i];
2298 }
2299 
2300 /// Returns true if a bignum is zero, false otherwise.
2301 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2302   for (unsigned i = 0; i < parts; i++)
2303     if (src[i])
2304       return false;
2305 
2306   return true;
2307 }
2308 
2309 /// Extract the given bit of a bignum; returns 0 or 1.
2310 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2311   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2312 }
2313 
2314 /// Set the given bit of a bignum.
2315 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2316   parts[whichWord(bit)] |= maskBit(bit);
2317 }
2318 
2319 /// Clears the given bit of a bignum.
2320 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2321   parts[whichWord(bit)] &= ~maskBit(bit);
2322 }
2323 
2324 /// Returns the bit number of the least significant set bit of a number.  If the
2325 /// input number has no bits set -1U is returned.
2326 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2327   for (unsigned i = 0; i < n; i++) {
2328     if (parts[i] != 0) {
2329       unsigned lsb = partLSB(parts[i]);
2330       return lsb + i * APINT_BITS_PER_WORD;
2331     }
2332   }
2333 
2334   return -1U;
2335 }
2336 
2337 /// Returns the bit number of the most significant set bit of a number.
2338 /// If the input number has no bits set -1U is returned.
2339 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2340   do {
2341     --n;
2342 
2343     if (parts[n] != 0) {
2344       unsigned msb = partMSB(parts[n]);
2345 
2346       return msb + n * APINT_BITS_PER_WORD;
2347     }
2348   } while (n);
2349 
2350   return -1U;
2351 }
2352 
2353 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
2354 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
2355 /// significant bit of DST.  All high bits above srcBITS in DST are zero-filled.
2356 /// */
2357 void
2358 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2359                  unsigned srcBits, unsigned srcLSB) {
2360   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2361   assert(dstParts <= dstCount);
2362 
2363   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2364   tcAssign(dst, src + firstSrcPart, dstParts);
2365 
2366   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2367   tcShiftRight(dst, dstParts, shift);
2368 
2369   // We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2370   // in DST.  If this is less that srcBits, append the rest, else
2371   // clear the high bits.
2372   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2373   if (n < srcBits) {
2374     WordType mask = lowBitMask (srcBits - n);
2375     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2376                           << n % APINT_BITS_PER_WORD);
2377   } else if (n > srcBits) {
2378     if (srcBits % APINT_BITS_PER_WORD)
2379       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2380   }
2381 
2382   // Clear high parts.
2383   while (dstParts < dstCount)
2384     dst[dstParts++] = 0;
2385 }
2386 
2387 //// DST += RHS + C where C is zero or one.  Returns the carry flag.
2388 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2389                              WordType c, unsigned parts) {
2390   assert(c <= 1);
2391 
2392   for (unsigned i = 0; i < parts; i++) {
2393     WordType l = dst[i];
2394     if (c) {
2395       dst[i] += rhs[i] + 1;
2396       c = (dst[i] <= l);
2397     } else {
2398       dst[i] += rhs[i];
2399       c = (dst[i] < l);
2400     }
2401   }
2402 
2403   return c;
2404 }
2405 
2406 /// This function adds a single "word" integer, src, to the multiple
2407 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2408 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2409 /// @returns the carry of the addition.
2410 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2411                                  unsigned parts) {
2412   for (unsigned i = 0; i < parts; ++i) {
2413     dst[i] += src;
2414     if (dst[i] >= src)
2415       return 0; // No need to carry so exit early.
2416     src = 1; // Carry one to next digit.
2417   }
2418 
2419   return 1;
2420 }
2421 
2422 /// DST -= RHS + C where C is zero or one.  Returns the carry flag.
2423 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2424                                   WordType c, unsigned parts) {
2425   assert(c <= 1);
2426 
2427   for (unsigned i = 0; i < parts; i++) {
2428     WordType l = dst[i];
2429     if (c) {
2430       dst[i] -= rhs[i] + 1;
2431       c = (dst[i] >= l);
2432     } else {
2433       dst[i] -= rhs[i];
2434       c = (dst[i] > l);
2435     }
2436   }
2437 
2438   return c;
2439 }
2440 
2441 /// This function subtracts a single "word" (64-bit word), src, from
2442 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2443 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2444 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2445 /// exhausted. In other words, if src > dst then this function returns 1,
2446 /// otherwise 0.
2447 /// @returns the borrow out of the subtraction
2448 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2449                                       unsigned parts) {
2450   for (unsigned i = 0; i < parts; ++i) {
2451     WordType Dst = dst[i];
2452     dst[i] -= src;
2453     if (src <= Dst)
2454       return 0; // No need to borrow so exit early.
2455     src = 1; // We have to "borrow 1" from next "word"
2456   }
2457 
2458   return 1;
2459 }
2460 
2461 /// Negate a bignum in-place.
2462 void APInt::tcNegate(WordType *dst, unsigned parts) {
2463   tcComplement(dst, parts);
2464   tcIncrement(dst, parts);
2465 }
2466 
2467 /// DST += SRC * MULTIPLIER + CARRY   if add is true
2468 /// DST  = SRC * MULTIPLIER + CARRY   if add is false
2469 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2470 /// they must start at the same point, i.e. DST == SRC.
2471 /// If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2472 /// returned.  Otherwise DST is filled with the least significant
2473 /// DSTPARTS parts of the result, and if all of the omitted higher
2474 /// parts were zero return zero, otherwise overflow occurred and
2475 /// return one.
2476 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2477                           WordType multiplier, WordType carry,
2478                           unsigned srcParts, unsigned dstParts,
2479                           bool add) {
2480   // Otherwise our writes of DST kill our later reads of SRC.
2481   assert(dst <= src || dst >= src + srcParts);
2482   assert(dstParts <= srcParts + 1);
2483 
2484   // N loops; minimum of dstParts and srcParts.
2485   unsigned n = std::min(dstParts, srcParts);
2486 
2487   for (unsigned i = 0; i < n; i++) {
2488     // [LOW, HIGH] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2489     // This cannot overflow, because:
2490     //   (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2491     // which is less than n^2.
2492     WordType srcPart = src[i];
2493     WordType low, mid, high;
2494     if (multiplier == 0 || srcPart == 0) {
2495       low = carry;
2496       high = 0;
2497     } else {
2498       low = lowHalf(srcPart) * lowHalf(multiplier);
2499       high = highHalf(srcPart) * highHalf(multiplier);
2500 
2501       mid = lowHalf(srcPart) * highHalf(multiplier);
2502       high += highHalf(mid);
2503       mid <<= APINT_BITS_PER_WORD / 2;
2504       if (low + mid < low)
2505         high++;
2506       low += mid;
2507 
2508       mid = highHalf(srcPart) * lowHalf(multiplier);
2509       high += highHalf(mid);
2510       mid <<= APINT_BITS_PER_WORD / 2;
2511       if (low + mid < low)
2512         high++;
2513       low += mid;
2514 
2515       // Now add carry.
2516       if (low + carry < low)
2517         high++;
2518       low += carry;
2519     }
2520 
2521     if (add) {
2522       // And now DST[i], and store the new low part there.
2523       if (low + dst[i] < low)
2524         high++;
2525       dst[i] += low;
2526     } else
2527       dst[i] = low;
2528 
2529     carry = high;
2530   }
2531 
2532   if (srcParts < dstParts) {
2533     // Full multiplication, there is no overflow.
2534     assert(srcParts + 1 == dstParts);
2535     dst[srcParts] = carry;
2536     return 0;
2537   }
2538 
2539   // We overflowed if there is carry.
2540   if (carry)
2541     return 1;
2542 
2543   // We would overflow if any significant unwritten parts would be
2544   // non-zero.  This is true if any remaining src parts are non-zero
2545   // and the multiplier is non-zero.
2546   if (multiplier)
2547     for (unsigned i = dstParts; i < srcParts; i++)
2548       if (src[i])
2549         return 1;
2550 
2551   // We fitted in the narrow destination.
2552   return 0;
2553 }
2554 
2555 /// DST = LHS * RHS, where DST has the same width as the operands and
2556 /// is filled with the least significant parts of the result.  Returns
2557 /// one if overflow occurred, otherwise zero.  DST must be disjoint
2558 /// from both operands.
2559 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2560                       const WordType *rhs, unsigned parts) {
2561   assert(dst != lhs && dst != rhs);
2562 
2563   int overflow = 0;
2564   tcSet(dst, 0, parts);
2565 
2566   for (unsigned i = 0; i < parts; i++)
2567     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2568                                parts - i, true);
2569 
2570   return overflow;
2571 }
2572 
2573 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2574 /// operands. No overflow occurs. DST must be disjoint from both operands.
2575 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2576                            const WordType *rhs, unsigned lhsParts,
2577                            unsigned rhsParts) {
2578   // Put the narrower number on the LHS for less loops below.
2579   if (lhsParts > rhsParts)
2580     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2581 
2582   assert(dst != lhs && dst != rhs);
2583 
2584   tcSet(dst, 0, rhsParts);
2585 
2586   for (unsigned i = 0; i < lhsParts; i++)
2587     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2588 }
2589 
2590 // If RHS is zero LHS and REMAINDER are left unchanged, return one.
2591 // Otherwise set LHS to LHS / RHS with the fractional part discarded,
2592 // set REMAINDER to the remainder, return zero.  i.e.
2593 //
2594 //   OLD_LHS = RHS * LHS + REMAINDER
2595 //
2596 // SCRATCH is a bignum of the same size as the operands and result for
2597 // use by the routine; its contents need not be initialized and are
2598 // destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2599 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2600                     WordType *remainder, WordType *srhs,
2601                     unsigned parts) {
2602   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2603 
2604   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2605   if (shiftCount == 0)
2606     return true;
2607 
2608   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2609   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2610   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2611 
2612   tcAssign(srhs, rhs, parts);
2613   tcShiftLeft(srhs, parts, shiftCount);
2614   tcAssign(remainder, lhs, parts);
2615   tcSet(lhs, 0, parts);
2616 
2617   // Loop, subtracting SRHS if REMAINDER is greater and adding that to the
2618   // total.
2619   for (;;) {
2620     int compare = tcCompare(remainder, srhs, parts);
2621     if (compare >= 0) {
2622       tcSubtract(remainder, srhs, 0, parts);
2623       lhs[n] |= mask;
2624     }
2625 
2626     if (shiftCount == 0)
2627       break;
2628     shiftCount--;
2629     tcShiftRight(srhs, parts, 1);
2630     if ((mask >>= 1) == 0) {
2631       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2632       n--;
2633     }
2634   }
2635 
2636   return false;
2637 }
2638 
2639 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2640 /// no restrictions on Count.
2641 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2642   // Don't bother performing a no-op shift.
2643   if (!Count)
2644     return;
2645 
2646   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2647   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2648   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2649 
2650   // Fastpath for moving by whole words.
2651   if (BitShift == 0) {
2652     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2653   } else {
2654     while (Words-- > WordShift) {
2655       Dst[Words] = Dst[Words - WordShift] << BitShift;
2656       if (Words > WordShift)
2657         Dst[Words] |=
2658           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2659     }
2660   }
2661 
2662   // Fill in the remainder with 0s.
2663   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2664 }
2665 
2666 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2667 /// are no restrictions on Count.
2668 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2669   // Don't bother performing a no-op shift.
2670   if (!Count)
2671     return;
2672 
2673   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2674   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2675   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2676 
2677   unsigned WordsToMove = Words - WordShift;
2678   // Fastpath for moving by whole words.
2679   if (BitShift == 0) {
2680     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2681   } else {
2682     for (unsigned i = 0; i != WordsToMove; ++i) {
2683       Dst[i] = Dst[i + WordShift] >> BitShift;
2684       if (i + 1 != WordsToMove)
2685         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2686     }
2687   }
2688 
2689   // Fill in the remainder with 0s.
2690   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2691 }
2692 
2693 // Comparison (unsigned) of two bignums.
2694 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2695                      unsigned parts) {
2696   while (parts) {
2697     parts--;
2698     if (lhs[parts] != rhs[parts])
2699       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2700   }
2701 
2702   return 0;
2703 }
2704 
2705 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2706                                    APInt::Rounding RM) {
2707   // Currently udivrem always rounds down.
2708   switch (RM) {
2709   case APInt::Rounding::DOWN:
2710   case APInt::Rounding::TOWARD_ZERO:
2711     return A.udiv(B);
2712   case APInt::Rounding::UP: {
2713     APInt Quo, Rem;
2714     APInt::udivrem(A, B, Quo, Rem);
2715     if (Rem.isZero())
2716       return Quo;
2717     return Quo + 1;
2718   }
2719   }
2720   llvm_unreachable("Unknown APInt::Rounding enum");
2721 }
2722 
2723 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2724                                    APInt::Rounding RM) {
2725   switch (RM) {
2726   case APInt::Rounding::DOWN:
2727   case APInt::Rounding::UP: {
2728     APInt Quo, Rem;
2729     APInt::sdivrem(A, B, Quo, Rem);
2730     if (Rem.isZero())
2731       return Quo;
2732     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2733     // We want to check whether the non-integer part of the mathematical value
2734     // is negative or not. If the non-integer part is negative, we need to round
2735     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2736     // already rounded down.
2737     if (RM == APInt::Rounding::DOWN) {
2738       if (Rem.isNegative() != B.isNegative())
2739         return Quo - 1;
2740       return Quo;
2741     }
2742     if (Rem.isNegative() != B.isNegative())
2743       return Quo;
2744     return Quo + 1;
2745   }
2746   // Currently sdiv rounds towards zero.
2747   case APInt::Rounding::TOWARD_ZERO:
2748     return A.sdiv(B);
2749   }
2750   llvm_unreachable("Unknown APInt::Rounding enum");
2751 }
2752 
2753 Optional<APInt>
2754 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2755                                            unsigned RangeWidth) {
2756   unsigned CoeffWidth = A.getBitWidth();
2757   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2758   assert(RangeWidth <= CoeffWidth &&
2759          "Value range width should be less than coefficient width");
2760   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2761 
2762   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2763                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2764 
2765   // Identify 0 as a (non)solution immediately.
2766   if (C.sextOrTrunc(RangeWidth).isZero()) {
2767     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2768     return APInt(CoeffWidth, 0);
2769   }
2770 
2771   // The result of APInt arithmetic has the same bit width as the operands,
2772   // so it can actually lose high bits. A product of two n-bit integers needs
2773   // 2n-1 bits to represent the full value.
2774   // The operation done below (on quadratic coefficients) that can produce
2775   // the largest value is the evaluation of the equation during bisection,
2776   // which needs 3 times the bitwidth of the coefficient, so the total number
2777   // of required bits is 3n.
2778   //
2779   // The purpose of this extension is to simulate the set Z of all integers,
2780   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2781   // and negative numbers (not so much in a modulo arithmetic). The method
2782   // used to solve the equation is based on the standard formula for real
2783   // numbers, and uses the concepts of "positive" and "negative" with their
2784   // usual meanings.
2785   CoeffWidth *= 3;
2786   A = A.sext(CoeffWidth);
2787   B = B.sext(CoeffWidth);
2788   C = C.sext(CoeffWidth);
2789 
2790   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2791   // the bit width has increased.
2792   if (A.isNegative()) {
2793     A.negate();
2794     B.negate();
2795     C.negate();
2796   }
2797 
2798   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2799   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2800   // and R = 2^BitWidth.
2801   // Since we're trying not only to find exact solutions, but also values
2802   // that "wrap around", such a set will always have a solution, i.e. an x
2803   // that satisfies at least one of the equations, or such that |q(x)|
2804   // exceeds kR, while |q(x-1)| for the same k does not.
2805   //
2806   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2807   // positive solution n (in the above sense), and also such that the n
2808   // will be the least among all solutions corresponding to k = 0, 1, ...
2809   // (more precisely, the least element in the set
2810   //   { n(k) | k is such that a solution n(k) exists }).
2811   //
2812   // Consider the parabola (over real numbers) that corresponds to the
2813   // quadratic equation. Since A > 0, the arms of the parabola will point
2814   // up. Picking different values of k will shift it up and down by R.
2815   //
2816   // We want to shift the parabola in such a way as to reduce the problem
2817   // of solving q(x) = kR to solving shifted_q(x) = 0.
2818   // (The interesting solutions are the ceilings of the real number
2819   // solutions.)
2820   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2821   APInt TwoA = 2 * A;
2822   APInt SqrB = B * B;
2823   bool PickLow;
2824 
2825   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2826     assert(A.isStrictlyPositive());
2827     APInt T = V.abs().urem(A);
2828     if (T.isZero())
2829       return V;
2830     return V.isNegative() ? V+T : V+(A-T);
2831   };
2832 
2833   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2834   // iff B is positive.
2835   if (B.isNonNegative()) {
2836     // If B >= 0, the vertex it at a negative location (or at 0), so in
2837     // order to have a non-negative solution we need to pick k that makes
2838     // C-kR negative. To satisfy all the requirements for the solution
2839     // that we are looking for, it needs to be closest to 0 of all k.
2840     C = C.srem(R);
2841     if (C.isStrictlyPositive())
2842       C -= R;
2843     // Pick the greater solution.
2844     PickLow = false;
2845   } else {
2846     // If B < 0, the vertex is at a positive location. For any solution
2847     // to exist, the discriminant must be non-negative. This means that
2848     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2849     // lower bound on values of k: kR >= C - B^2/4A.
2850     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2851     // Round LowkR up (towards +inf) to the nearest kR.
2852     LowkR = RoundUp(LowkR, R);
2853 
2854     // If there exists k meeting the condition above, and such that
2855     // C-kR > 0, there will be two positive real number solutions of
2856     // q(x) = kR. Out of all such values of k, pick the one that makes
2857     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2858     // In other words, find maximum k such that LowkR <= kR < C.
2859     if (C.sgt(LowkR)) {
2860       // If LowkR < C, then such a k is guaranteed to exist because
2861       // LowkR itself is a multiple of R.
2862       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2863       // Pick the smaller solution.
2864       PickLow = true;
2865     } else {
2866       // If C-kR < 0 for all potential k's, it means that one solution
2867       // will be negative, while the other will be positive. The positive
2868       // solution will shift towards 0 if the parabola is moved up.
2869       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2870       // to 0, or in other words, out of all parabolas that have solutions,
2871       // pick the one that is the farthest "up").
2872       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2873       C -= LowkR;
2874       // Pick the greater solution.
2875       PickLow = false;
2876     }
2877   }
2878 
2879   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2880                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2881 
2882   APInt D = SqrB - 4*A*C;
2883   assert(D.isNonNegative() && "Negative discriminant");
2884   APInt SQ = D.sqrt();
2885 
2886   APInt Q = SQ * SQ;
2887   bool InexactSQ = Q != D;
2888   // The calculated SQ may actually be greater than the exact (non-integer)
2889   // value. If that's the case, decrement SQ to get a value that is lower.
2890   if (Q.sgt(D))
2891     SQ -= 1;
2892 
2893   APInt X;
2894   APInt Rem;
2895 
2896   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2897   // When using the quadratic formula directly, the calculated low root
2898   // may be greater than the exact one, since we would be subtracting SQ.
2899   // To make sure that the calculated root is not greater than the exact
2900   // one, subtract SQ+1 when calculating the low root (for inexact value
2901   // of SQ).
2902   if (PickLow)
2903     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2904   else
2905     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2906 
2907   // The updated coefficients should be such that the (exact) solution is
2908   // positive. Since APInt division rounds towards 0, the calculated one
2909   // can be 0, but cannot be negative.
2910   assert(X.isNonNegative() && "Solution should be non-negative");
2911 
2912   if (!InexactSQ && Rem.isZero()) {
2913     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2914     return X;
2915   }
2916 
2917   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2918   // The exact value of the square root of D should be between SQ and SQ+1.
2919   // This implies that the solution should be between that corresponding to
2920   // SQ (i.e. X) and that corresponding to SQ+1.
2921   //
2922   // The calculated X cannot be greater than the exact (real) solution.
2923   // Actually it must be strictly less than the exact solution, while
2924   // X+1 will be greater than or equal to it.
2925 
2926   APInt VX = (A*X + B)*X + C;
2927   APInt VY = VX + TwoA*X + A + B;
2928   bool SignChange =
2929       VX.isNegative() != VY.isNegative() || VX.isZero() != VY.isZero();
2930   // If the sign did not change between X and X+1, X is not a valid solution.
2931   // This could happen when the actual (exact) roots don't have an integer
2932   // between them, so they would both be contained between X and X+1.
2933   if (!SignChange) {
2934     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2935     return None;
2936   }
2937 
2938   X += 1;
2939   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2940   return X;
2941 }
2942 
2943 Optional<unsigned>
2944 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
2945   assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
2946   if (A == B)
2947     return llvm::None;
2948   return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
2949 }
2950 
2951 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2952 /// with the integer held in IntVal.
2953 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
2954                             unsigned StoreBytes) {
2955   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
2956   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
2957 
2958   if (sys::IsLittleEndianHost) {
2959     // Little-endian host - the source is ordered from LSB to MSB.  Order the
2960     // destination from LSB to MSB: Do a straight copy.
2961     memcpy(Dst, Src, StoreBytes);
2962   } else {
2963     // Big-endian host - the source is an array of 64 bit words ordered from
2964     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
2965     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
2966     while (StoreBytes > sizeof(uint64_t)) {
2967       StoreBytes -= sizeof(uint64_t);
2968       // May not be aligned so use memcpy.
2969       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
2970       Src += sizeof(uint64_t);
2971     }
2972 
2973     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
2974   }
2975 }
2976 
2977 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2978 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2979 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
2980                              unsigned LoadBytes) {
2981   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
2982   uint8_t *Dst = reinterpret_cast<uint8_t *>(
2983                    const_cast<uint64_t *>(IntVal.getRawData()));
2984 
2985   if (sys::IsLittleEndianHost)
2986     // Little-endian host - the destination must be ordered from LSB to MSB.
2987     // The source is ordered from LSB to MSB: Do a straight copy.
2988     memcpy(Dst, Src, LoadBytes);
2989   else {
2990     // Big-endian - the destination is an array of 64 bit words ordered from
2991     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
2992     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
2993     // a word.
2994     while (LoadBytes > sizeof(uint64_t)) {
2995       LoadBytes -= sizeof(uint64_t);
2996       // May not be aligned so use memcpy.
2997       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
2998       Dst += sizeof(uint64_t);
2999     }
3000 
3001     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3002   }
3003 }
3004