1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APInt to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt &RHS) {
244   WordType *dst = U.pVal, *rhs = RHS.U.pVal;
245   for (size_t i = 0, e = getNumWords(); i != e; ++i)
246     dst[i] &= rhs[i];
247 }
248 
249 void APInt::OrAssignSlowCase(const APInt &RHS) {
250   WordType *dst = U.pVal, *rhs = RHS.U.pVal;
251   for (size_t i = 0, e = getNumWords(); i != e; ++i)
252     dst[i] |= rhs[i];
253 }
254 
255 void APInt::XorAssignSlowCase(const APInt &RHS) {
256   WordType *dst = U.pVal, *rhs = RHS.U.pVal;
257   for (size_t i = 0, e = getNumWords(); i != e; ++i)
258     dst[i] ^= rhs[i];
259 }
260 
261 APInt& APInt::operator*=(const APInt& RHS) {
262   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
263   *this = *this * RHS;
264   return *this;
265 }
266 
267 APInt& APInt::operator*=(uint64_t RHS) {
268   if (isSingleWord()) {
269     U.VAL *= RHS;
270   } else {
271     unsigned NumWords = getNumWords();
272     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
273   }
274   return clearUnusedBits();
275 }
276 
277 bool APInt::EqualSlowCase(const APInt& RHS) const {
278   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
279 }
280 
281 int APInt::compare(const APInt& RHS) const {
282   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
283   if (isSingleWord())
284     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
285 
286   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
287 }
288 
289 int APInt::compareSigned(const APInt& RHS) const {
290   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
291   if (isSingleWord()) {
292     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
293     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
294     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
295   }
296 
297   bool lhsNeg = isNegative();
298   bool rhsNeg = RHS.isNegative();
299 
300   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
301   if (lhsNeg != rhsNeg)
302     return lhsNeg ? -1 : 1;
303 
304   // Otherwise we can just use an unsigned comparison, because even negative
305   // numbers compare correctly this way if both have the same signed-ness.
306   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
307 }
308 
309 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
310   unsigned loWord = whichWord(loBit);
311   unsigned hiWord = whichWord(hiBit);
312 
313   // Create an initial mask for the low word with zeros below loBit.
314   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
315 
316   // If hiBit is not aligned, we need a high mask.
317   unsigned hiShiftAmt = whichBit(hiBit);
318   if (hiShiftAmt != 0) {
319     // Create a high mask with zeros above hiBit.
320     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
321     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
322     // set the bits in hiWord.
323     if (hiWord == loWord)
324       loMask &= hiMask;
325     else
326       U.pVal[hiWord] |= hiMask;
327   }
328   // Apply the mask to the low word.
329   U.pVal[loWord] |= loMask;
330 
331   // Fill any words between loWord and hiWord with all ones.
332   for (unsigned word = loWord + 1; word < hiWord; ++word)
333     U.pVal[word] = WORDTYPE_MAX;
334 }
335 
336 // Complement a bignum in-place.
337 static void tcComplement(APInt::WordType *dst, unsigned parts) {
338   for (unsigned i = 0; i < parts; i++)
339     dst[i] = ~dst[i];
340 }
341 
342 /// Toggle every bit to its opposite value.
343 void APInt::flipAllBitsSlowCase() {
344   tcComplement(U.pVal, getNumWords());
345   clearUnusedBits();
346 }
347 
348 /// Toggle a given bit to its opposite value whose position is given
349 /// as "bitPosition".
350 /// Toggles a given bit to its opposite value.
351 void APInt::flipBit(unsigned bitPosition) {
352   assert(bitPosition < BitWidth && "Out of the bit-width range!");
353   setBitVal(bitPosition, !(*this)[bitPosition]);
354 }
355 
356 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
357   unsigned subBitWidth = subBits.getBitWidth();
358   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
359          "Illegal bit insertion");
360 
361   // Insertion is a direct copy.
362   if (subBitWidth == BitWidth) {
363     *this = subBits;
364     return;
365   }
366 
367   // Single word result can be done as a direct bitmask.
368   if (isSingleWord()) {
369     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
370     U.VAL &= ~(mask << bitPosition);
371     U.VAL |= (subBits.U.VAL << bitPosition);
372     return;
373   }
374 
375   unsigned loBit = whichBit(bitPosition);
376   unsigned loWord = whichWord(bitPosition);
377   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
378 
379   // Insertion within a single word can be done as a direct bitmask.
380   if (loWord == hi1Word) {
381     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
382     U.pVal[loWord] &= ~(mask << loBit);
383     U.pVal[loWord] |= (subBits.U.VAL << loBit);
384     return;
385   }
386 
387   // Insert on word boundaries.
388   if (loBit == 0) {
389     // Direct copy whole words.
390     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
391     memcpy(U.pVal + loWord, subBits.getRawData(),
392            numWholeSubWords * APINT_WORD_SIZE);
393 
394     // Mask+insert remaining bits.
395     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
396     if (remainingBits != 0) {
397       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
398       U.pVal[hi1Word] &= ~mask;
399       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
400     }
401     return;
402   }
403 
404   // General case - set/clear individual bits in dst based on src.
405   // TODO - there is scope for optimization here, but at the moment this code
406   // path is barely used so prefer readability over performance.
407   for (unsigned i = 0; i != subBitWidth; ++i)
408     setBitVal(bitPosition + i, subBits[i]);
409 }
410 
411 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
412   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
413   subBits &= maskBits;
414   if (isSingleWord()) {
415     U.VAL &= ~(maskBits << bitPosition);
416     U.VAL |= subBits << bitPosition;
417     return;
418   }
419 
420   unsigned loBit = whichBit(bitPosition);
421   unsigned loWord = whichWord(bitPosition);
422   unsigned hiWord = whichWord(bitPosition + numBits - 1);
423   if (loWord == hiWord) {
424     U.pVal[loWord] &= ~(maskBits << loBit);
425     U.pVal[loWord] |= subBits << loBit;
426     return;
427   }
428 
429   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
430   unsigned wordBits = 8 * sizeof(WordType);
431   U.pVal[loWord] &= ~(maskBits << loBit);
432   U.pVal[loWord] |= subBits << loBit;
433 
434   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
435   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
436 }
437 
438 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
439   assert(numBits > 0 && "Can't extract zero bits");
440   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
441          "Illegal bit extraction");
442 
443   if (isSingleWord())
444     return APInt(numBits, U.VAL >> bitPosition);
445 
446   unsigned loBit = whichBit(bitPosition);
447   unsigned loWord = whichWord(bitPosition);
448   unsigned hiWord = whichWord(bitPosition + numBits - 1);
449 
450   // Single word result extracting bits from a single word source.
451   if (loWord == hiWord)
452     return APInt(numBits, U.pVal[loWord] >> loBit);
453 
454   // Extracting bits that start on a source word boundary can be done
455   // as a fast memory copy.
456   if (loBit == 0)
457     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
458 
459   // General case - shift + copy source words directly into place.
460   APInt Result(numBits, 0);
461   unsigned NumSrcWords = getNumWords();
462   unsigned NumDstWords = Result.getNumWords();
463 
464   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
465   for (unsigned word = 0; word < NumDstWords; ++word) {
466     uint64_t w0 = U.pVal[loWord + word];
467     uint64_t w1 =
468         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
469     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
470   }
471 
472   return Result.clearUnusedBits();
473 }
474 
475 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
476                                        unsigned bitPosition) const {
477   assert(numBits > 0 && "Can't extract zero bits");
478   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
479          "Illegal bit extraction");
480   assert(numBits <= 64 && "Illegal bit extraction");
481 
482   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
483   if (isSingleWord())
484     return (U.VAL >> bitPosition) & maskBits;
485 
486   unsigned loBit = whichBit(bitPosition);
487   unsigned loWord = whichWord(bitPosition);
488   unsigned hiWord = whichWord(bitPosition + numBits - 1);
489   if (loWord == hiWord)
490     return (U.pVal[loWord] >> loBit) & maskBits;
491 
492   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
493   unsigned wordBits = 8 * sizeof(WordType);
494   uint64_t retBits = U.pVal[loWord] >> loBit;
495   retBits |= U.pVal[hiWord] << (wordBits - loBit);
496   retBits &= maskBits;
497   return retBits;
498 }
499 
500 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
501   assert(!str.empty() && "Invalid string length");
502   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
503           radix == 36) &&
504          "Radix should be 2, 8, 10, 16, or 36!");
505 
506   size_t slen = str.size();
507 
508   // Each computation below needs to know if it's negative.
509   StringRef::iterator p = str.begin();
510   unsigned isNegative = *p == '-';
511   if (*p == '-' || *p == '+') {
512     p++;
513     slen--;
514     assert(slen && "String is only a sign, needs a value.");
515   }
516 
517   // For radixes of power-of-two values, the bits required is accurately and
518   // easily computed
519   if (radix == 2)
520     return slen + isNegative;
521   if (radix == 8)
522     return slen * 3 + isNegative;
523   if (radix == 16)
524     return slen * 4 + isNegative;
525 
526   // FIXME: base 36
527 
528   // This is grossly inefficient but accurate. We could probably do something
529   // with a computation of roughly slen*64/20 and then adjust by the value of
530   // the first few digits. But, I'm not sure how accurate that could be.
531 
532   // Compute a sufficient number of bits that is always large enough but might
533   // be too large. This avoids the assertion in the constructor. This
534   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
535   // bits in that case.
536   unsigned sufficient
537     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
538                  : (slen == 1 ? 7 : slen * 16/3);
539 
540   // Convert to the actual binary value.
541   APInt tmp(sufficient, StringRef(p, slen), radix);
542 
543   // Compute how many bits are required. If the log is infinite, assume we need
544   // just bit. If the log is exact and value is negative, then the value is
545   // MinSignedValue with (log + 1) bits.
546   unsigned log = tmp.logBase2();
547   if (log == (unsigned)-1) {
548     return isNegative + 1;
549   } else if (isNegative && tmp.isPowerOf2()) {
550     return isNegative + log;
551   } else {
552     return isNegative + log + 1;
553   }
554 }
555 
556 hash_code llvm::hash_value(const APInt &Arg) {
557   if (Arg.isSingleWord())
558     return hash_combine(Arg.BitWidth, Arg.U.VAL);
559 
560   return hash_combine(
561       Arg.BitWidth,
562       hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
563 }
564 
565 unsigned DenseMapInfo<APInt>::getHashValue(const APInt &Key) {
566   return static_cast<unsigned>(hash_value(Key));
567 }
568 
569 bool APInt::isSplat(unsigned SplatSizeInBits) const {
570   assert(getBitWidth() % SplatSizeInBits == 0 &&
571          "SplatSizeInBits must divide width!");
572   // We can check that all parts of an integer are equal by making use of a
573   // little trick: rotate and check if it's still the same value.
574   return *this == rotl(SplatSizeInBits);
575 }
576 
577 /// This function returns the high "numBits" bits of this APInt.
578 APInt APInt::getHiBits(unsigned numBits) const {
579   return this->lshr(BitWidth - numBits);
580 }
581 
582 /// This function returns the low "numBits" bits of this APInt.
583 APInt APInt::getLoBits(unsigned numBits) const {
584   APInt Result(getLowBitsSet(BitWidth, numBits));
585   Result &= *this;
586   return Result;
587 }
588 
589 /// Return a value containing V broadcasted over NewLen bits.
590 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
591   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
592 
593   APInt Val = V.zextOrSelf(NewLen);
594   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
595     Val |= Val << I;
596 
597   return Val;
598 }
599 
600 unsigned APInt::countLeadingZerosSlowCase() const {
601   unsigned Count = 0;
602   for (int i = getNumWords()-1; i >= 0; --i) {
603     uint64_t V = U.pVal[i];
604     if (V == 0)
605       Count += APINT_BITS_PER_WORD;
606     else {
607       Count += llvm::countLeadingZeros(V);
608       break;
609     }
610   }
611   // Adjust for unused bits in the most significant word (they are zero).
612   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
613   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
614   return Count;
615 }
616 
617 unsigned APInt::countLeadingOnesSlowCase() const {
618   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
619   unsigned shift;
620   if (!highWordBits) {
621     highWordBits = APINT_BITS_PER_WORD;
622     shift = 0;
623   } else {
624     shift = APINT_BITS_PER_WORD - highWordBits;
625   }
626   int i = getNumWords() - 1;
627   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
628   if (Count == highWordBits) {
629     for (i--; i >= 0; --i) {
630       if (U.pVal[i] == WORDTYPE_MAX)
631         Count += APINT_BITS_PER_WORD;
632       else {
633         Count += llvm::countLeadingOnes(U.pVal[i]);
634         break;
635       }
636     }
637   }
638   return Count;
639 }
640 
641 unsigned APInt::countTrailingZerosSlowCase() const {
642   unsigned Count = 0;
643   unsigned i = 0;
644   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
645     Count += APINT_BITS_PER_WORD;
646   if (i < getNumWords())
647     Count += llvm::countTrailingZeros(U.pVal[i]);
648   return std::min(Count, BitWidth);
649 }
650 
651 unsigned APInt::countTrailingOnesSlowCase() const {
652   unsigned Count = 0;
653   unsigned i = 0;
654   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
655     Count += APINT_BITS_PER_WORD;
656   if (i < getNumWords())
657     Count += llvm::countTrailingOnes(U.pVal[i]);
658   assert(Count <= BitWidth);
659   return Count;
660 }
661 
662 unsigned APInt::countPopulationSlowCase() const {
663   unsigned Count = 0;
664   for (unsigned i = 0; i < getNumWords(); ++i)
665     Count += llvm::countPopulation(U.pVal[i]);
666   return Count;
667 }
668 
669 bool APInt::intersectsSlowCase(const APInt &RHS) const {
670   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
671     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
672       return true;
673 
674   return false;
675 }
676 
677 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
678   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
679     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
680       return false;
681 
682   return true;
683 }
684 
685 APInt APInt::byteSwap() const {
686   assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
687   if (BitWidth == 16)
688     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
689   if (BitWidth == 32)
690     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
691   if (BitWidth <= 64) {
692     uint64_t Tmp1 = ByteSwap_64(U.VAL);
693     Tmp1 >>= (64 - BitWidth);
694     return APInt(BitWidth, Tmp1);
695   }
696 
697   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
698   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
699     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
700   if (Result.BitWidth != BitWidth) {
701     Result.lshrInPlace(Result.BitWidth - BitWidth);
702     Result.BitWidth = BitWidth;
703   }
704   return Result;
705 }
706 
707 APInt APInt::reverseBits() const {
708   switch (BitWidth) {
709   case 64:
710     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
711   case 32:
712     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
713   case 16:
714     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
715   case 8:
716     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
717   default:
718     break;
719   }
720 
721   APInt Val(*this);
722   APInt Reversed(BitWidth, 0);
723   unsigned S = BitWidth;
724 
725   for (; Val != 0; Val.lshrInPlace(1)) {
726     Reversed <<= 1;
727     Reversed |= Val[0];
728     --S;
729   }
730 
731   Reversed <<= S;
732   return Reversed;
733 }
734 
735 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
736   // Fast-path a common case.
737   if (A == B) return A;
738 
739   // Corner cases: if either operand is zero, the other is the gcd.
740   if (!A) return B;
741   if (!B) return A;
742 
743   // Count common powers of 2 and remove all other powers of 2.
744   unsigned Pow2;
745   {
746     unsigned Pow2_A = A.countTrailingZeros();
747     unsigned Pow2_B = B.countTrailingZeros();
748     if (Pow2_A > Pow2_B) {
749       A.lshrInPlace(Pow2_A - Pow2_B);
750       Pow2 = Pow2_B;
751     } else if (Pow2_B > Pow2_A) {
752       B.lshrInPlace(Pow2_B - Pow2_A);
753       Pow2 = Pow2_A;
754     } else {
755       Pow2 = Pow2_A;
756     }
757   }
758 
759   // Both operands are odd multiples of 2^Pow_2:
760   //
761   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
762   //
763   // This is a modified version of Stein's algorithm, taking advantage of
764   // efficient countTrailingZeros().
765   while (A != B) {
766     if (A.ugt(B)) {
767       A -= B;
768       A.lshrInPlace(A.countTrailingZeros() - Pow2);
769     } else {
770       B -= A;
771       B.lshrInPlace(B.countTrailingZeros() - Pow2);
772     }
773   }
774 
775   return A;
776 }
777 
778 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
779   uint64_t I = bit_cast<uint64_t>(Double);
780 
781   // Get the sign bit from the highest order bit
782   bool isNeg = I >> 63;
783 
784   // Get the 11-bit exponent and adjust for the 1023 bit bias
785   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
786 
787   // If the exponent is negative, the value is < 0 so just return 0.
788   if (exp < 0)
789     return APInt(width, 0u);
790 
791   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
792   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
793 
794   // If the exponent doesn't shift all bits out of the mantissa
795   if (exp < 52)
796     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
797                     APInt(width, mantissa >> (52 - exp));
798 
799   // If the client didn't provide enough bits for us to shift the mantissa into
800   // then the result is undefined, just return 0
801   if (width <= exp - 52)
802     return APInt(width, 0);
803 
804   // Otherwise, we have to shift the mantissa bits up to the right location
805   APInt Tmp(width, mantissa);
806   Tmp <<= (unsigned)exp - 52;
807   return isNeg ? -Tmp : Tmp;
808 }
809 
810 /// This function converts this APInt to a double.
811 /// The layout for double is as following (IEEE Standard 754):
812 ///  --------------------------------------
813 /// |  Sign    Exponent    Fraction    Bias |
814 /// |-------------------------------------- |
815 /// |  1[63]   11[62-52]   52[51-00]   1023 |
816 ///  --------------------------------------
817 double APInt::roundToDouble(bool isSigned) const {
818 
819   // Handle the simple case where the value is contained in one uint64_t.
820   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
821   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
822     if (isSigned) {
823       int64_t sext = SignExtend64(getWord(0), BitWidth);
824       return double(sext);
825     } else
826       return double(getWord(0));
827   }
828 
829   // Determine if the value is negative.
830   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
831 
832   // Construct the absolute value if we're negative.
833   APInt Tmp(isNeg ? -(*this) : (*this));
834 
835   // Figure out how many bits we're using.
836   unsigned n = Tmp.getActiveBits();
837 
838   // The exponent (without bias normalization) is just the number of bits
839   // we are using. Note that the sign bit is gone since we constructed the
840   // absolute value.
841   uint64_t exp = n;
842 
843   // Return infinity for exponent overflow
844   if (exp > 1023) {
845     if (!isSigned || !isNeg)
846       return std::numeric_limits<double>::infinity();
847     else
848       return -std::numeric_limits<double>::infinity();
849   }
850   exp += 1023; // Increment for 1023 bias
851 
852   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
853   // extract the high 52 bits from the correct words in pVal.
854   uint64_t mantissa;
855   unsigned hiWord = whichWord(n-1);
856   if (hiWord == 0) {
857     mantissa = Tmp.U.pVal[0];
858     if (n > 52)
859       mantissa >>= n - 52; // shift down, we want the top 52 bits.
860   } else {
861     assert(hiWord > 0 && "huh?");
862     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
863     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
864     mantissa = hibits | lobits;
865   }
866 
867   // The leading bit of mantissa is implicit, so get rid of it.
868   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
869   uint64_t I = sign | (exp << 52) | mantissa;
870   return bit_cast<double>(I);
871 }
872 
873 // Truncate to new width.
874 APInt APInt::trunc(unsigned width) const {
875   assert(width < BitWidth && "Invalid APInt Truncate request");
876   assert(width && "Can't truncate to 0 bits");
877 
878   if (width <= APINT_BITS_PER_WORD)
879     return APInt(width, getRawData()[0]);
880 
881   APInt Result(getMemory(getNumWords(width)), width);
882 
883   // Copy full words.
884   unsigned i;
885   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
886     Result.U.pVal[i] = U.pVal[i];
887 
888   // Truncate and copy any partial word.
889   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
890   if (bits != 0)
891     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
892 
893   return Result;
894 }
895 
896 // Truncate to new width with unsigned saturation.
897 APInt APInt::truncUSat(unsigned width) const {
898   assert(width < BitWidth && "Invalid APInt Truncate request");
899   assert(width && "Can't truncate to 0 bits");
900 
901   // Can we just losslessly truncate it?
902   if (isIntN(width))
903     return trunc(width);
904   // If not, then just return the new limit.
905   return APInt::getMaxValue(width);
906 }
907 
908 // Truncate to new width with signed saturation.
909 APInt APInt::truncSSat(unsigned width) const {
910   assert(width < BitWidth && "Invalid APInt Truncate request");
911   assert(width && "Can't truncate to 0 bits");
912 
913   // Can we just losslessly truncate it?
914   if (isSignedIntN(width))
915     return trunc(width);
916   // If not, then just return the new limits.
917   return isNegative() ? APInt::getSignedMinValue(width)
918                       : APInt::getSignedMaxValue(width);
919 }
920 
921 // Sign extend to a new width.
922 APInt APInt::sext(unsigned Width) const {
923   assert(Width > BitWidth && "Invalid APInt SignExtend request");
924 
925   if (Width <= APINT_BITS_PER_WORD)
926     return APInt(Width, SignExtend64(U.VAL, BitWidth));
927 
928   APInt Result(getMemory(getNumWords(Width)), Width);
929 
930   // Copy words.
931   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
932 
933   // Sign extend the last word since there may be unused bits in the input.
934   Result.U.pVal[getNumWords() - 1] =
935       SignExtend64(Result.U.pVal[getNumWords() - 1],
936                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
937 
938   // Fill with sign bits.
939   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
940               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
941   Result.clearUnusedBits();
942   return Result;
943 }
944 
945 //  Zero extend to a new width.
946 APInt APInt::zext(unsigned width) const {
947   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
948 
949   if (width <= APINT_BITS_PER_WORD)
950     return APInt(width, U.VAL);
951 
952   APInt Result(getMemory(getNumWords(width)), width);
953 
954   // Copy words.
955   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
956 
957   // Zero remaining words.
958   std::memset(Result.U.pVal + getNumWords(), 0,
959               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
960 
961   return Result;
962 }
963 
964 APInt APInt::zextOrTrunc(unsigned width) const {
965   if (BitWidth < width)
966     return zext(width);
967   if (BitWidth > width)
968     return trunc(width);
969   return *this;
970 }
971 
972 APInt APInt::sextOrTrunc(unsigned width) const {
973   if (BitWidth < width)
974     return sext(width);
975   if (BitWidth > width)
976     return trunc(width);
977   return *this;
978 }
979 
980 APInt APInt::truncOrSelf(unsigned width) const {
981   if (BitWidth > width)
982     return trunc(width);
983   return *this;
984 }
985 
986 APInt APInt::zextOrSelf(unsigned width) const {
987   if (BitWidth < width)
988     return zext(width);
989   return *this;
990 }
991 
992 APInt APInt::sextOrSelf(unsigned width) const {
993   if (BitWidth < width)
994     return sext(width);
995   return *this;
996 }
997 
998 /// Arithmetic right-shift this APInt by shiftAmt.
999 /// Arithmetic right-shift function.
1000 void APInt::ashrInPlace(const APInt &shiftAmt) {
1001   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1002 }
1003 
1004 /// Arithmetic right-shift this APInt by shiftAmt.
1005 /// Arithmetic right-shift function.
1006 void APInt::ashrSlowCase(unsigned ShiftAmt) {
1007   // Don't bother performing a no-op shift.
1008   if (!ShiftAmt)
1009     return;
1010 
1011   // Save the original sign bit for later.
1012   bool Negative = isNegative();
1013 
1014   // WordShift is the inter-part shift; BitShift is intra-part shift.
1015   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1016   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1017 
1018   unsigned WordsToMove = getNumWords() - WordShift;
1019   if (WordsToMove != 0) {
1020     // Sign extend the last word to fill in the unused bits.
1021     U.pVal[getNumWords() - 1] = SignExtend64(
1022         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1023 
1024     // Fastpath for moving by whole words.
1025     if (BitShift == 0) {
1026       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1027     } else {
1028       // Move the words containing significant bits.
1029       for (unsigned i = 0; i != WordsToMove - 1; ++i)
1030         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1031                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1032 
1033       // Handle the last word which has no high bits to copy.
1034       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1035       // Sign extend one more time.
1036       U.pVal[WordsToMove - 1] =
1037           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1038     }
1039   }
1040 
1041   // Fill in the remainder based on the original sign.
1042   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1043               WordShift * APINT_WORD_SIZE);
1044   clearUnusedBits();
1045 }
1046 
1047 /// Logical right-shift this APInt by shiftAmt.
1048 /// Logical right-shift function.
1049 void APInt::lshrInPlace(const APInt &shiftAmt) {
1050   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1051 }
1052 
1053 /// Logical right-shift this APInt by shiftAmt.
1054 /// Logical right-shift function.
1055 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1056   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1057 }
1058 
1059 /// Left-shift this APInt by shiftAmt.
1060 /// Left-shift function.
1061 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1062   // It's undefined behavior in C to shift by BitWidth or greater.
1063   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1064   return *this;
1065 }
1066 
1067 void APInt::shlSlowCase(unsigned ShiftAmt) {
1068   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1069   clearUnusedBits();
1070 }
1071 
1072 // Calculate the rotate amount modulo the bit width.
1073 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1074   unsigned rotBitWidth = rotateAmt.getBitWidth();
1075   APInt rot = rotateAmt;
1076   if (rotBitWidth < BitWidth) {
1077     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1078     // e.g. APInt(1, 32) would give APInt(1, 0).
1079     rot = rotateAmt.zext(BitWidth);
1080   }
1081   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1082   return rot.getLimitedValue(BitWidth);
1083 }
1084 
1085 APInt APInt::rotl(const APInt &rotateAmt) const {
1086   return rotl(rotateModulo(BitWidth, rotateAmt));
1087 }
1088 
1089 APInt APInt::rotl(unsigned rotateAmt) const {
1090   rotateAmt %= BitWidth;
1091   if (rotateAmt == 0)
1092     return *this;
1093   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1094 }
1095 
1096 APInt APInt::rotr(const APInt &rotateAmt) const {
1097   return rotr(rotateModulo(BitWidth, rotateAmt));
1098 }
1099 
1100 APInt APInt::rotr(unsigned rotateAmt) const {
1101   rotateAmt %= BitWidth;
1102   if (rotateAmt == 0)
1103     return *this;
1104   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1105 }
1106 
1107 /// \returns the nearest log base 2 of this APInt. Ties round up.
1108 ///
1109 /// NOTE: When we have a BitWidth of 1, we define:
1110 ///
1111 ///   log2(0) = UINT32_MAX
1112 ///   log2(1) = 0
1113 ///
1114 /// to get around any mathematical concerns resulting from
1115 /// referencing 2 in a space where 2 does no exist.
1116 unsigned APInt::nearestLogBase2() const {
1117   // Special case when we have a bitwidth of 1. If VAL is 1, then we
1118   // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1119   // UINT32_MAX.
1120   if (BitWidth == 1)
1121     return U.VAL - 1;
1122 
1123   // Handle the zero case.
1124   if (isNullValue())
1125     return UINT32_MAX;
1126 
1127   // The non-zero case is handled by computing:
1128   //
1129   //   nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1130   //
1131   // where x[i] is referring to the value of the ith bit of x.
1132   unsigned lg = logBase2();
1133   return lg + unsigned((*this)[lg - 1]);
1134 }
1135 
1136 // Square Root - this method computes and returns the square root of "this".
1137 // Three mechanisms are used for computation. For small values (<= 5 bits),
1138 // a table lookup is done. This gets some performance for common cases. For
1139 // values using less than 52 bits, the value is converted to double and then
1140 // the libc sqrt function is called. The result is rounded and then converted
1141 // back to a uint64_t which is then used to construct the result. Finally,
1142 // the Babylonian method for computing square roots is used.
1143 APInt APInt::sqrt() const {
1144 
1145   // Determine the magnitude of the value.
1146   unsigned magnitude = getActiveBits();
1147 
1148   // Use a fast table for some small values. This also gets rid of some
1149   // rounding errors in libc sqrt for small values.
1150   if (magnitude <= 5) {
1151     static const uint8_t results[32] = {
1152       /*     0 */ 0,
1153       /*  1- 2 */ 1, 1,
1154       /*  3- 6 */ 2, 2, 2, 2,
1155       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1156       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1157       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1158       /*    31 */ 6
1159     };
1160     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1161   }
1162 
1163   // If the magnitude of the value fits in less than 52 bits (the precision of
1164   // an IEEE double precision floating point value), then we can use the
1165   // libc sqrt function which will probably use a hardware sqrt computation.
1166   // This should be faster than the algorithm below.
1167   if (magnitude < 52) {
1168     return APInt(BitWidth,
1169                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1170                                                                : U.pVal[0])))));
1171   }
1172 
1173   // Okay, all the short cuts are exhausted. We must compute it. The following
1174   // is a classical Babylonian method for computing the square root. This code
1175   // was adapted to APInt from a wikipedia article on such computations.
1176   // See http://www.wikipedia.org/ and go to the page named
1177   // Calculate_an_integer_square_root.
1178   unsigned nbits = BitWidth, i = 4;
1179   APInt testy(BitWidth, 16);
1180   APInt x_old(BitWidth, 1);
1181   APInt x_new(BitWidth, 0);
1182   APInt two(BitWidth, 2);
1183 
1184   // Select a good starting value using binary logarithms.
1185   for (;; i += 2, testy = testy.shl(2))
1186     if (i >= nbits || this->ule(testy)) {
1187       x_old = x_old.shl(i / 2);
1188       break;
1189     }
1190 
1191   // Use the Babylonian method to arrive at the integer square root:
1192   for (;;) {
1193     x_new = (this->udiv(x_old) + x_old).udiv(two);
1194     if (x_old.ule(x_new))
1195       break;
1196     x_old = x_new;
1197   }
1198 
1199   // Make sure we return the closest approximation
1200   // NOTE: The rounding calculation below is correct. It will produce an
1201   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1202   // determined to be a rounding issue with pari/gp as it begins to use a
1203   // floating point representation after 192 bits. There are no discrepancies
1204   // between this algorithm and pari/gp for bit widths < 192 bits.
1205   APInt square(x_old * x_old);
1206   APInt nextSquare((x_old + 1) * (x_old +1));
1207   if (this->ult(square))
1208     return x_old;
1209   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1210   APInt midpoint((nextSquare - square).udiv(two));
1211   APInt offset(*this - square);
1212   if (offset.ult(midpoint))
1213     return x_old;
1214   return x_old + 1;
1215 }
1216 
1217 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1218 /// iterative extended Euclidean algorithm is used to solve for this value,
1219 /// however we simplify it to speed up calculating only the inverse, and take
1220 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1221 /// (potentially large) APInts around.
1222 /// WARNING: a value of '0' may be returned,
1223 ///          signifying that no multiplicative inverse exists!
1224 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1225   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1226 
1227   // Using the properties listed at the following web page (accessed 06/21/08):
1228   //   http://www.numbertheory.org/php/euclid.html
1229   // (especially the properties numbered 3, 4 and 9) it can be proved that
1230   // BitWidth bits suffice for all the computations in the algorithm implemented
1231   // below. More precisely, this number of bits suffice if the multiplicative
1232   // inverse exists, but may not suffice for the general extended Euclidean
1233   // algorithm.
1234 
1235   APInt r[2] = { modulo, *this };
1236   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1237   APInt q(BitWidth, 0);
1238 
1239   unsigned i;
1240   for (i = 0; r[i^1] != 0; i ^= 1) {
1241     // An overview of the math without the confusing bit-flipping:
1242     // q = r[i-2] / r[i-1]
1243     // r[i] = r[i-2] % r[i-1]
1244     // t[i] = t[i-2] - t[i-1] * q
1245     udivrem(r[i], r[i^1], q, r[i]);
1246     t[i] -= t[i^1] * q;
1247   }
1248 
1249   // If this APInt and the modulo are not coprime, there is no multiplicative
1250   // inverse, so return 0. We check this by looking at the next-to-last
1251   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1252   // algorithm.
1253   if (r[i] != 1)
1254     return APInt(BitWidth, 0);
1255 
1256   // The next-to-last t is the multiplicative inverse.  However, we are
1257   // interested in a positive inverse. Calculate a positive one from a negative
1258   // one if necessary. A simple addition of the modulo suffices because
1259   // abs(t[i]) is known to be less than *this/2 (see the link above).
1260   if (t[i].isNegative())
1261     t[i] += modulo;
1262 
1263   return std::move(t[i]);
1264 }
1265 
1266 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1267 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1268 /// variables here have the same names as in the algorithm. Comments explain
1269 /// the algorithm and any deviation from it.
1270 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1271                      unsigned m, unsigned n) {
1272   assert(u && "Must provide dividend");
1273   assert(v && "Must provide divisor");
1274   assert(q && "Must provide quotient");
1275   assert(u != v && u != q && v != q && "Must use different memory");
1276   assert(n>1 && "n must be > 1");
1277 
1278   // b denotes the base of the number system. In our case b is 2^32.
1279   const uint64_t b = uint64_t(1) << 32;
1280 
1281 // The DEBUG macros here tend to be spam in the debug output if you're not
1282 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1283 #ifdef KNUTH_DEBUG
1284 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1285 #else
1286 #define DEBUG_KNUTH(X) do {} while(false)
1287 #endif
1288 
1289   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1290   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1291   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1292   DEBUG_KNUTH(dbgs() << " by");
1293   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1294   DEBUG_KNUTH(dbgs() << '\n');
1295   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1296   // u and v by d. Note that we have taken Knuth's advice here to use a power
1297   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1298   // 2 allows us to shift instead of multiply and it is easy to determine the
1299   // shift amount from the leading zeros.  We are basically normalizing the u
1300   // and v so that its high bits are shifted to the top of v's range without
1301   // overflow. Note that this can require an extra word in u so that u must
1302   // be of length m+n+1.
1303   unsigned shift = countLeadingZeros(v[n-1]);
1304   uint32_t v_carry = 0;
1305   uint32_t u_carry = 0;
1306   if (shift) {
1307     for (unsigned i = 0; i < m+n; ++i) {
1308       uint32_t u_tmp = u[i] >> (32 - shift);
1309       u[i] = (u[i] << shift) | u_carry;
1310       u_carry = u_tmp;
1311     }
1312     for (unsigned i = 0; i < n; ++i) {
1313       uint32_t v_tmp = v[i] >> (32 - shift);
1314       v[i] = (v[i] << shift) | v_carry;
1315       v_carry = v_tmp;
1316     }
1317   }
1318   u[m+n] = u_carry;
1319 
1320   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1321   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1322   DEBUG_KNUTH(dbgs() << " by");
1323   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1324   DEBUG_KNUTH(dbgs() << '\n');
1325 
1326   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1327   int j = m;
1328   do {
1329     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1330     // D3. [Calculate q'.].
1331     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1332     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1333     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1334     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1335     // on v[n-2] determines at high speed most of the cases in which the trial
1336     // value qp is one too large, and it eliminates all cases where qp is two
1337     // too large.
1338     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1339     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1340     uint64_t qp = dividend / v[n-1];
1341     uint64_t rp = dividend % v[n-1];
1342     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1343       qp--;
1344       rp += v[n-1];
1345       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1346         qp--;
1347     }
1348     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1349 
1350     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1351     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1352     // consists of a simple multiplication by a one-place number, combined with
1353     // a subtraction.
1354     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1355     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1356     // true value plus b**(n+1), namely as the b's complement of
1357     // the true value, and a "borrow" to the left should be remembered.
1358     int64_t borrow = 0;
1359     for (unsigned i = 0; i < n; ++i) {
1360       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1361       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1362       u[j+i] = Lo_32(subres);
1363       borrow = Hi_32(p) - Hi_32(subres);
1364       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1365                         << ", borrow = " << borrow << '\n');
1366     }
1367     bool isNeg = u[j+n] < borrow;
1368     u[j+n] -= Lo_32(borrow);
1369 
1370     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1371     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1372     DEBUG_KNUTH(dbgs() << '\n');
1373 
1374     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1375     // negative, go to step D6; otherwise go on to step D7.
1376     q[j] = Lo_32(qp);
1377     if (isNeg) {
1378       // D6. [Add back]. The probability that this step is necessary is very
1379       // small, on the order of only 2/b. Make sure that test data accounts for
1380       // this possibility. Decrease q[j] by 1
1381       q[j]--;
1382       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1383       // A carry will occur to the left of u[j+n], and it should be ignored
1384       // since it cancels with the borrow that occurred in D4.
1385       bool carry = false;
1386       for (unsigned i = 0; i < n; i++) {
1387         uint32_t limit = std::min(u[j+i],v[i]);
1388         u[j+i] += v[i] + carry;
1389         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1390       }
1391       u[j+n] += carry;
1392     }
1393     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1394     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1395     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1396 
1397     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1398   } while (--j >= 0);
1399 
1400   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1401   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1402   DEBUG_KNUTH(dbgs() << '\n');
1403 
1404   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1405   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1406   // compute the remainder (urem uses this).
1407   if (r) {
1408     // The value d is expressed by the "shift" value above since we avoided
1409     // multiplication by d by using a shift left. So, all we have to do is
1410     // shift right here.
1411     if (shift) {
1412       uint32_t carry = 0;
1413       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1414       for (int i = n-1; i >= 0; i--) {
1415         r[i] = (u[i] >> shift) | carry;
1416         carry = u[i] << (32 - shift);
1417         DEBUG_KNUTH(dbgs() << " " << r[i]);
1418       }
1419     } else {
1420       for (int i = n-1; i >= 0; i--) {
1421         r[i] = u[i];
1422         DEBUG_KNUTH(dbgs() << " " << r[i]);
1423       }
1424     }
1425     DEBUG_KNUTH(dbgs() << '\n');
1426   }
1427   DEBUG_KNUTH(dbgs() << '\n');
1428 }
1429 
1430 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1431                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1432   assert(lhsWords >= rhsWords && "Fractional result");
1433 
1434   // First, compose the values into an array of 32-bit words instead of
1435   // 64-bit words. This is a necessity of both the "short division" algorithm
1436   // and the Knuth "classical algorithm" which requires there to be native
1437   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1438   // can't use 64-bit operands here because we don't have native results of
1439   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1440   // work on large-endian machines.
1441   unsigned n = rhsWords * 2;
1442   unsigned m = (lhsWords * 2) - n;
1443 
1444   // Allocate space for the temporary values we need either on the stack, if
1445   // it will fit, or on the heap if it won't.
1446   uint32_t SPACE[128];
1447   uint32_t *U = nullptr;
1448   uint32_t *V = nullptr;
1449   uint32_t *Q = nullptr;
1450   uint32_t *R = nullptr;
1451   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1452     U = &SPACE[0];
1453     V = &SPACE[m+n+1];
1454     Q = &SPACE[(m+n+1) + n];
1455     if (Remainder)
1456       R = &SPACE[(m+n+1) + n + (m+n)];
1457   } else {
1458     U = new uint32_t[m + n + 1];
1459     V = new uint32_t[n];
1460     Q = new uint32_t[m+n];
1461     if (Remainder)
1462       R = new uint32_t[n];
1463   }
1464 
1465   // Initialize the dividend
1466   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1467   for (unsigned i = 0; i < lhsWords; ++i) {
1468     uint64_t tmp = LHS[i];
1469     U[i * 2] = Lo_32(tmp);
1470     U[i * 2 + 1] = Hi_32(tmp);
1471   }
1472   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1473 
1474   // Initialize the divisor
1475   memset(V, 0, (n)*sizeof(uint32_t));
1476   for (unsigned i = 0; i < rhsWords; ++i) {
1477     uint64_t tmp = RHS[i];
1478     V[i * 2] = Lo_32(tmp);
1479     V[i * 2 + 1] = Hi_32(tmp);
1480   }
1481 
1482   // initialize the quotient and remainder
1483   memset(Q, 0, (m+n) * sizeof(uint32_t));
1484   if (Remainder)
1485     memset(R, 0, n * sizeof(uint32_t));
1486 
1487   // Now, adjust m and n for the Knuth division. n is the number of words in
1488   // the divisor. m is the number of words by which the dividend exceeds the
1489   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1490   // contain any zero words or the Knuth algorithm fails.
1491   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1492     n--;
1493     m++;
1494   }
1495   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1496     m--;
1497 
1498   // If we're left with only a single word for the divisor, Knuth doesn't work
1499   // so we implement the short division algorithm here. This is much simpler
1500   // and faster because we are certain that we can divide a 64-bit quantity
1501   // by a 32-bit quantity at hardware speed and short division is simply a
1502   // series of such operations. This is just like doing short division but we
1503   // are using base 2^32 instead of base 10.
1504   assert(n != 0 && "Divide by zero?");
1505   if (n == 1) {
1506     uint32_t divisor = V[0];
1507     uint32_t remainder = 0;
1508     for (int i = m; i >= 0; i--) {
1509       uint64_t partial_dividend = Make_64(remainder, U[i]);
1510       if (partial_dividend == 0) {
1511         Q[i] = 0;
1512         remainder = 0;
1513       } else if (partial_dividend < divisor) {
1514         Q[i] = 0;
1515         remainder = Lo_32(partial_dividend);
1516       } else if (partial_dividend == divisor) {
1517         Q[i] = 1;
1518         remainder = 0;
1519       } else {
1520         Q[i] = Lo_32(partial_dividend / divisor);
1521         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1522       }
1523     }
1524     if (R)
1525       R[0] = remainder;
1526   } else {
1527     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1528     // case n > 1.
1529     KnuthDiv(U, V, Q, R, m, n);
1530   }
1531 
1532   // If the caller wants the quotient
1533   if (Quotient) {
1534     for (unsigned i = 0; i < lhsWords; ++i)
1535       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1536   }
1537 
1538   // If the caller wants the remainder
1539   if (Remainder) {
1540     for (unsigned i = 0; i < rhsWords; ++i)
1541       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1542   }
1543 
1544   // Clean up the memory we allocated.
1545   if (U != &SPACE[0]) {
1546     delete [] U;
1547     delete [] V;
1548     delete [] Q;
1549     delete [] R;
1550   }
1551 }
1552 
1553 APInt APInt::udiv(const APInt &RHS) const {
1554   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1555 
1556   // First, deal with the easy case
1557   if (isSingleWord()) {
1558     assert(RHS.U.VAL != 0 && "Divide by zero?");
1559     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1560   }
1561 
1562   // Get some facts about the LHS and RHS number of bits and words
1563   unsigned lhsWords = getNumWords(getActiveBits());
1564   unsigned rhsBits  = RHS.getActiveBits();
1565   unsigned rhsWords = getNumWords(rhsBits);
1566   assert(rhsWords && "Divided by zero???");
1567 
1568   // Deal with some degenerate cases
1569   if (!lhsWords)
1570     // 0 / X ===> 0
1571     return APInt(BitWidth, 0);
1572   if (rhsBits == 1)
1573     // X / 1 ===> X
1574     return *this;
1575   if (lhsWords < rhsWords || this->ult(RHS))
1576     // X / Y ===> 0, iff X < Y
1577     return APInt(BitWidth, 0);
1578   if (*this == RHS)
1579     // X / X ===> 1
1580     return APInt(BitWidth, 1);
1581   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1582     // All high words are zero, just use native divide
1583     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1584 
1585   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1586   APInt Quotient(BitWidth, 0); // to hold result.
1587   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1588   return Quotient;
1589 }
1590 
1591 APInt APInt::udiv(uint64_t RHS) const {
1592   assert(RHS != 0 && "Divide by zero?");
1593 
1594   // First, deal with the easy case
1595   if (isSingleWord())
1596     return APInt(BitWidth, U.VAL / RHS);
1597 
1598   // Get some facts about the LHS words.
1599   unsigned lhsWords = getNumWords(getActiveBits());
1600 
1601   // Deal with some degenerate cases
1602   if (!lhsWords)
1603     // 0 / X ===> 0
1604     return APInt(BitWidth, 0);
1605   if (RHS == 1)
1606     // X / 1 ===> X
1607     return *this;
1608   if (this->ult(RHS))
1609     // X / Y ===> 0, iff X < Y
1610     return APInt(BitWidth, 0);
1611   if (*this == RHS)
1612     // X / X ===> 1
1613     return APInt(BitWidth, 1);
1614   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1615     // All high words are zero, just use native divide
1616     return APInt(BitWidth, this->U.pVal[0] / RHS);
1617 
1618   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1619   APInt Quotient(BitWidth, 0); // to hold result.
1620   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1621   return Quotient;
1622 }
1623 
1624 APInt APInt::sdiv(const APInt &RHS) const {
1625   if (isNegative()) {
1626     if (RHS.isNegative())
1627       return (-(*this)).udiv(-RHS);
1628     return -((-(*this)).udiv(RHS));
1629   }
1630   if (RHS.isNegative())
1631     return -(this->udiv(-RHS));
1632   return this->udiv(RHS);
1633 }
1634 
1635 APInt APInt::sdiv(int64_t RHS) const {
1636   if (isNegative()) {
1637     if (RHS < 0)
1638       return (-(*this)).udiv(-RHS);
1639     return -((-(*this)).udiv(RHS));
1640   }
1641   if (RHS < 0)
1642     return -(this->udiv(-RHS));
1643   return this->udiv(RHS);
1644 }
1645 
1646 APInt APInt::urem(const APInt &RHS) const {
1647   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1648   if (isSingleWord()) {
1649     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1650     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1651   }
1652 
1653   // Get some facts about the LHS
1654   unsigned lhsWords = getNumWords(getActiveBits());
1655 
1656   // Get some facts about the RHS
1657   unsigned rhsBits = RHS.getActiveBits();
1658   unsigned rhsWords = getNumWords(rhsBits);
1659   assert(rhsWords && "Performing remainder operation by zero ???");
1660 
1661   // Check the degenerate cases
1662   if (lhsWords == 0)
1663     // 0 % Y ===> 0
1664     return APInt(BitWidth, 0);
1665   if (rhsBits == 1)
1666     // X % 1 ===> 0
1667     return APInt(BitWidth, 0);
1668   if (lhsWords < rhsWords || this->ult(RHS))
1669     // X % Y ===> X, iff X < Y
1670     return *this;
1671   if (*this == RHS)
1672     // X % X == 0;
1673     return APInt(BitWidth, 0);
1674   if (lhsWords == 1)
1675     // All high words are zero, just use native remainder
1676     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1677 
1678   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1679   APInt Remainder(BitWidth, 0);
1680   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1681   return Remainder;
1682 }
1683 
1684 uint64_t APInt::urem(uint64_t RHS) const {
1685   assert(RHS != 0 && "Remainder by zero?");
1686 
1687   if (isSingleWord())
1688     return U.VAL % RHS;
1689 
1690   // Get some facts about the LHS
1691   unsigned lhsWords = getNumWords(getActiveBits());
1692 
1693   // Check the degenerate cases
1694   if (lhsWords == 0)
1695     // 0 % Y ===> 0
1696     return 0;
1697   if (RHS == 1)
1698     // X % 1 ===> 0
1699     return 0;
1700   if (this->ult(RHS))
1701     // X % Y ===> X, iff X < Y
1702     return getZExtValue();
1703   if (*this == RHS)
1704     // X % X == 0;
1705     return 0;
1706   if (lhsWords == 1)
1707     // All high words are zero, just use native remainder
1708     return U.pVal[0] % RHS;
1709 
1710   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1711   uint64_t Remainder;
1712   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1713   return Remainder;
1714 }
1715 
1716 APInt APInt::srem(const APInt &RHS) const {
1717   if (isNegative()) {
1718     if (RHS.isNegative())
1719       return -((-(*this)).urem(-RHS));
1720     return -((-(*this)).urem(RHS));
1721   }
1722   if (RHS.isNegative())
1723     return this->urem(-RHS);
1724   return this->urem(RHS);
1725 }
1726 
1727 int64_t APInt::srem(int64_t RHS) const {
1728   if (isNegative()) {
1729     if (RHS < 0)
1730       return -((-(*this)).urem(-RHS));
1731     return -((-(*this)).urem(RHS));
1732   }
1733   if (RHS < 0)
1734     return this->urem(-RHS);
1735   return this->urem(RHS);
1736 }
1737 
1738 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1739                     APInt &Quotient, APInt &Remainder) {
1740   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1741   unsigned BitWidth = LHS.BitWidth;
1742 
1743   // First, deal with the easy case
1744   if (LHS.isSingleWord()) {
1745     assert(RHS.U.VAL != 0 && "Divide by zero?");
1746     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1747     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1748     Quotient = APInt(BitWidth, QuotVal);
1749     Remainder = APInt(BitWidth, RemVal);
1750     return;
1751   }
1752 
1753   // Get some size facts about the dividend and divisor
1754   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1755   unsigned rhsBits  = RHS.getActiveBits();
1756   unsigned rhsWords = getNumWords(rhsBits);
1757   assert(rhsWords && "Performing divrem operation by zero ???");
1758 
1759   // Check the degenerate cases
1760   if (lhsWords == 0) {
1761     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1762     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1763     return;
1764   }
1765 
1766   if (rhsBits == 1) {
1767     Quotient = LHS;                   // X / 1 ===> X
1768     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1769   }
1770 
1771   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1772     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1773     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1774     return;
1775   }
1776 
1777   if (LHS == RHS) {
1778     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1779     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1780     return;
1781   }
1782 
1783   // Make sure there is enough space to hold the results.
1784   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1785   // change the size. This is necessary if Quotient or Remainder is aliased
1786   // with LHS or RHS.
1787   Quotient.reallocate(BitWidth);
1788   Remainder.reallocate(BitWidth);
1789 
1790   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1791     // There is only one word to consider so use the native versions.
1792     uint64_t lhsValue = LHS.U.pVal[0];
1793     uint64_t rhsValue = RHS.U.pVal[0];
1794     Quotient = lhsValue / rhsValue;
1795     Remainder = lhsValue % rhsValue;
1796     return;
1797   }
1798 
1799   // Okay, lets do it the long way
1800   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1801          Remainder.U.pVal);
1802   // Clear the rest of the Quotient and Remainder.
1803   std::memset(Quotient.U.pVal + lhsWords, 0,
1804               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1805   std::memset(Remainder.U.pVal + rhsWords, 0,
1806               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1807 }
1808 
1809 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1810                     uint64_t &Remainder) {
1811   assert(RHS != 0 && "Divide by zero?");
1812   unsigned BitWidth = LHS.BitWidth;
1813 
1814   // First, deal with the easy case
1815   if (LHS.isSingleWord()) {
1816     uint64_t QuotVal = LHS.U.VAL / RHS;
1817     Remainder = LHS.U.VAL % RHS;
1818     Quotient = APInt(BitWidth, QuotVal);
1819     return;
1820   }
1821 
1822   // Get some size facts about the dividend and divisor
1823   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1824 
1825   // Check the degenerate cases
1826   if (lhsWords == 0) {
1827     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1828     Remainder = 0;                    // 0 % Y ===> 0
1829     return;
1830   }
1831 
1832   if (RHS == 1) {
1833     Quotient = LHS;                   // X / 1 ===> X
1834     Remainder = 0;                    // X % 1 ===> 0
1835     return;
1836   }
1837 
1838   if (LHS.ult(RHS)) {
1839     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1840     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1841     return;
1842   }
1843 
1844   if (LHS == RHS) {
1845     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1846     Remainder = 0;                    // X % X ===> 0;
1847     return;
1848   }
1849 
1850   // Make sure there is enough space to hold the results.
1851   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1852   // change the size. This is necessary if Quotient is aliased with LHS.
1853   Quotient.reallocate(BitWidth);
1854 
1855   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1856     // There is only one word to consider so use the native versions.
1857     uint64_t lhsValue = LHS.U.pVal[0];
1858     Quotient = lhsValue / RHS;
1859     Remainder = lhsValue % RHS;
1860     return;
1861   }
1862 
1863   // Okay, lets do it the long way
1864   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1865   // Clear the rest of the Quotient.
1866   std::memset(Quotient.U.pVal + lhsWords, 0,
1867               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1868 }
1869 
1870 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1871                     APInt &Quotient, APInt &Remainder) {
1872   if (LHS.isNegative()) {
1873     if (RHS.isNegative())
1874       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1875     else {
1876       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1877       Quotient.negate();
1878     }
1879     Remainder.negate();
1880   } else if (RHS.isNegative()) {
1881     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1882     Quotient.negate();
1883   } else {
1884     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1885   }
1886 }
1887 
1888 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1889                     APInt &Quotient, int64_t &Remainder) {
1890   uint64_t R = Remainder;
1891   if (LHS.isNegative()) {
1892     if (RHS < 0)
1893       APInt::udivrem(-LHS, -RHS, Quotient, R);
1894     else {
1895       APInt::udivrem(-LHS, RHS, Quotient, R);
1896       Quotient.negate();
1897     }
1898     R = -R;
1899   } else if (RHS < 0) {
1900     APInt::udivrem(LHS, -RHS, Quotient, R);
1901     Quotient.negate();
1902   } else {
1903     APInt::udivrem(LHS, RHS, Quotient, R);
1904   }
1905   Remainder = R;
1906 }
1907 
1908 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1909   APInt Res = *this+RHS;
1910   Overflow = isNonNegative() == RHS.isNonNegative() &&
1911              Res.isNonNegative() != isNonNegative();
1912   return Res;
1913 }
1914 
1915 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1916   APInt Res = *this+RHS;
1917   Overflow = Res.ult(RHS);
1918   return Res;
1919 }
1920 
1921 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1922   APInt Res = *this - RHS;
1923   Overflow = isNonNegative() != RHS.isNonNegative() &&
1924              Res.isNonNegative() != isNonNegative();
1925   return Res;
1926 }
1927 
1928 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1929   APInt Res = *this-RHS;
1930   Overflow = Res.ugt(*this);
1931   return Res;
1932 }
1933 
1934 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1935   // MININT/-1  -->  overflow.
1936   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1937   return sdiv(RHS);
1938 }
1939 
1940 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1941   APInt Res = *this * RHS;
1942 
1943   if (*this != 0 && RHS != 0)
1944     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1945   else
1946     Overflow = false;
1947   return Res;
1948 }
1949 
1950 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1951   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1952     Overflow = true;
1953     return *this * RHS;
1954   }
1955 
1956   APInt Res = lshr(1) * RHS;
1957   Overflow = Res.isNegative();
1958   Res <<= 1;
1959   if ((*this)[0]) {
1960     Res += RHS;
1961     if (Res.ult(RHS))
1962       Overflow = true;
1963   }
1964   return Res;
1965 }
1966 
1967 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1968   Overflow = ShAmt.uge(getBitWidth());
1969   if (Overflow)
1970     return APInt(BitWidth, 0);
1971 
1972   if (isNonNegative()) // Don't allow sign change.
1973     Overflow = ShAmt.uge(countLeadingZeros());
1974   else
1975     Overflow = ShAmt.uge(countLeadingOnes());
1976 
1977   return *this << ShAmt;
1978 }
1979 
1980 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1981   Overflow = ShAmt.uge(getBitWidth());
1982   if (Overflow)
1983     return APInt(BitWidth, 0);
1984 
1985   Overflow = ShAmt.ugt(countLeadingZeros());
1986 
1987   return *this << ShAmt;
1988 }
1989 
1990 APInt APInt::sadd_sat(const APInt &RHS) const {
1991   bool Overflow;
1992   APInt Res = sadd_ov(RHS, Overflow);
1993   if (!Overflow)
1994     return Res;
1995 
1996   return isNegative() ? APInt::getSignedMinValue(BitWidth)
1997                       : APInt::getSignedMaxValue(BitWidth);
1998 }
1999 
2000 APInt APInt::uadd_sat(const APInt &RHS) const {
2001   bool Overflow;
2002   APInt Res = uadd_ov(RHS, Overflow);
2003   if (!Overflow)
2004     return Res;
2005 
2006   return APInt::getMaxValue(BitWidth);
2007 }
2008 
2009 APInt APInt::ssub_sat(const APInt &RHS) const {
2010   bool Overflow;
2011   APInt Res = ssub_ov(RHS, Overflow);
2012   if (!Overflow)
2013     return Res;
2014 
2015   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2016                       : APInt::getSignedMaxValue(BitWidth);
2017 }
2018 
2019 APInt APInt::usub_sat(const APInt &RHS) const {
2020   bool Overflow;
2021   APInt Res = usub_ov(RHS, Overflow);
2022   if (!Overflow)
2023     return Res;
2024 
2025   return APInt(BitWidth, 0);
2026 }
2027 
2028 APInt APInt::smul_sat(const APInt &RHS) const {
2029   bool Overflow;
2030   APInt Res = smul_ov(RHS, Overflow);
2031   if (!Overflow)
2032     return Res;
2033 
2034   // The result is negative if one and only one of inputs is negative.
2035   bool ResIsNegative = isNegative() ^ RHS.isNegative();
2036 
2037   return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2038                        : APInt::getSignedMaxValue(BitWidth);
2039 }
2040 
2041 APInt APInt::umul_sat(const APInt &RHS) const {
2042   bool Overflow;
2043   APInt Res = umul_ov(RHS, Overflow);
2044   if (!Overflow)
2045     return Res;
2046 
2047   return APInt::getMaxValue(BitWidth);
2048 }
2049 
2050 APInt APInt::sshl_sat(const APInt &RHS) const {
2051   bool Overflow;
2052   APInt Res = sshl_ov(RHS, Overflow);
2053   if (!Overflow)
2054     return Res;
2055 
2056   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2057                       : APInt::getSignedMaxValue(BitWidth);
2058 }
2059 
2060 APInt APInt::ushl_sat(const APInt &RHS) const {
2061   bool Overflow;
2062   APInt Res = ushl_ov(RHS, Overflow);
2063   if (!Overflow)
2064     return Res;
2065 
2066   return APInt::getMaxValue(BitWidth);
2067 }
2068 
2069 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2070   // Check our assumptions here
2071   assert(!str.empty() && "Invalid string length");
2072   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2073           radix == 36) &&
2074          "Radix should be 2, 8, 10, 16, or 36!");
2075 
2076   StringRef::iterator p = str.begin();
2077   size_t slen = str.size();
2078   bool isNeg = *p == '-';
2079   if (*p == '-' || *p == '+') {
2080     p++;
2081     slen--;
2082     assert(slen && "String is only a sign, needs a value.");
2083   }
2084   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2085   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2086   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2087   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2088          "Insufficient bit width");
2089 
2090   // Allocate memory if needed
2091   if (isSingleWord())
2092     U.VAL = 0;
2093   else
2094     U.pVal = getClearedMemory(getNumWords());
2095 
2096   // Figure out if we can shift instead of multiply
2097   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2098 
2099   // Enter digit traversal loop
2100   for (StringRef::iterator e = str.end(); p != e; ++p) {
2101     unsigned digit = getDigit(*p, radix);
2102     assert(digit < radix && "Invalid character in digit string");
2103 
2104     // Shift or multiply the value by the radix
2105     if (slen > 1) {
2106       if (shift)
2107         *this <<= shift;
2108       else
2109         *this *= radix;
2110     }
2111 
2112     // Add in the digit we just interpreted
2113     *this += digit;
2114   }
2115   // If its negative, put it in two's complement form
2116   if (isNeg)
2117     this->negate();
2118 }
2119 
2120 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2121                      bool Signed, bool formatAsCLiteral) const {
2122   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2123           Radix == 36) &&
2124          "Radix should be 2, 8, 10, 16, or 36!");
2125 
2126   const char *Prefix = "";
2127   if (formatAsCLiteral) {
2128     switch (Radix) {
2129       case 2:
2130         // Binary literals are a non-standard extension added in gcc 4.3:
2131         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2132         Prefix = "0b";
2133         break;
2134       case 8:
2135         Prefix = "0";
2136         break;
2137       case 10:
2138         break; // No prefix
2139       case 16:
2140         Prefix = "0x";
2141         break;
2142       default:
2143         llvm_unreachable("Invalid radix!");
2144     }
2145   }
2146 
2147   // First, check for a zero value and just short circuit the logic below.
2148   if (*this == 0) {
2149     while (*Prefix) {
2150       Str.push_back(*Prefix);
2151       ++Prefix;
2152     };
2153     Str.push_back('0');
2154     return;
2155   }
2156 
2157   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2158 
2159   if (isSingleWord()) {
2160     char Buffer[65];
2161     char *BufPtr = std::end(Buffer);
2162 
2163     uint64_t N;
2164     if (!Signed) {
2165       N = getZExtValue();
2166     } else {
2167       int64_t I = getSExtValue();
2168       if (I >= 0) {
2169         N = I;
2170       } else {
2171         Str.push_back('-');
2172         N = -(uint64_t)I;
2173       }
2174     }
2175 
2176     while (*Prefix) {
2177       Str.push_back(*Prefix);
2178       ++Prefix;
2179     };
2180 
2181     while (N) {
2182       *--BufPtr = Digits[N % Radix];
2183       N /= Radix;
2184     }
2185     Str.append(BufPtr, std::end(Buffer));
2186     return;
2187   }
2188 
2189   APInt Tmp(*this);
2190 
2191   if (Signed && isNegative()) {
2192     // They want to print the signed version and it is a negative value
2193     // Flip the bits and add one to turn it into the equivalent positive
2194     // value and put a '-' in the result.
2195     Tmp.negate();
2196     Str.push_back('-');
2197   }
2198 
2199   while (*Prefix) {
2200     Str.push_back(*Prefix);
2201     ++Prefix;
2202   };
2203 
2204   // We insert the digits backward, then reverse them to get the right order.
2205   unsigned StartDig = Str.size();
2206 
2207   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2208   // because the number of bits per digit (1, 3 and 4 respectively) divides
2209   // equally.  We just shift until the value is zero.
2210   if (Radix == 2 || Radix == 8 || Radix == 16) {
2211     // Just shift tmp right for each digit width until it becomes zero
2212     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2213     unsigned MaskAmt = Radix - 1;
2214 
2215     while (Tmp.getBoolValue()) {
2216       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2217       Str.push_back(Digits[Digit]);
2218       Tmp.lshrInPlace(ShiftAmt);
2219     }
2220   } else {
2221     while (Tmp.getBoolValue()) {
2222       uint64_t Digit;
2223       udivrem(Tmp, Radix, Tmp, Digit);
2224       assert(Digit < Radix && "divide failed");
2225       Str.push_back(Digits[Digit]);
2226     }
2227   }
2228 
2229   // Reverse the digits before returning.
2230   std::reverse(Str.begin()+StartDig, Str.end());
2231 }
2232 
2233 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2234 LLVM_DUMP_METHOD void APInt::dump() const {
2235   SmallString<40> S, U;
2236   this->toStringUnsigned(U);
2237   this->toStringSigned(S);
2238   dbgs() << "APInt(" << BitWidth << "b, "
2239          << U << "u " << S << "s)\n";
2240 }
2241 #endif
2242 
2243 void APInt::print(raw_ostream &OS, bool isSigned) const {
2244   SmallString<40> S;
2245   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2246   OS << S;
2247 }
2248 
2249 // This implements a variety of operations on a representation of
2250 // arbitrary precision, two's-complement, bignum integer values.
2251 
2252 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2253 // and unrestricting assumption.
2254 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2255               "Part width must be divisible by 2!");
2256 
2257 // Returns the integer part with the least significant BITS set.
2258 // BITS cannot be zero.
2259 static inline APInt::WordType lowBitMask(unsigned bits) {
2260   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2261   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2262 }
2263 
2264 /// Returns the value of the lower half of PART.
2265 static inline APInt::WordType lowHalf(APInt::WordType part) {
2266   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2267 }
2268 
2269 /// Returns the value of the upper half of PART.
2270 static inline APInt::WordType highHalf(APInt::WordType part) {
2271   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2272 }
2273 
2274 /// Returns the bit number of the most significant set bit of a part.
2275 /// If the input number has no bits set -1U is returned.
2276 static unsigned partMSB(APInt::WordType value) {
2277   return findLastSet(value, ZB_Max);
2278 }
2279 
2280 /// Returns the bit number of the least significant set bit of a part.  If the
2281 /// input number has no bits set -1U is returned.
2282 static unsigned partLSB(APInt::WordType value) {
2283   return findFirstSet(value, ZB_Max);
2284 }
2285 
2286 /// Sets the least significant part of a bignum to the input value, and zeroes
2287 /// out higher parts.
2288 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2289   assert(parts > 0);
2290   dst[0] = part;
2291   for (unsigned i = 1; i < parts; i++)
2292     dst[i] = 0;
2293 }
2294 
2295 /// Assign one bignum to another.
2296 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2297   for (unsigned i = 0; i < parts; i++)
2298     dst[i] = src[i];
2299 }
2300 
2301 /// Returns true if a bignum is zero, false otherwise.
2302 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2303   for (unsigned i = 0; i < parts; i++)
2304     if (src[i])
2305       return false;
2306 
2307   return true;
2308 }
2309 
2310 /// Extract the given bit of a bignum; returns 0 or 1.
2311 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2312   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2313 }
2314 
2315 /// Set the given bit of a bignum.
2316 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2317   parts[whichWord(bit)] |= maskBit(bit);
2318 }
2319 
2320 /// Clears the given bit of a bignum.
2321 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2322   parts[whichWord(bit)] &= ~maskBit(bit);
2323 }
2324 
2325 /// Returns the bit number of the least significant set bit of a number.  If the
2326 /// input number has no bits set -1U is returned.
2327 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2328   for (unsigned i = 0; i < n; i++) {
2329     if (parts[i] != 0) {
2330       unsigned lsb = partLSB(parts[i]);
2331       return lsb + i * APINT_BITS_PER_WORD;
2332     }
2333   }
2334 
2335   return -1U;
2336 }
2337 
2338 /// Returns the bit number of the most significant set bit of a number.
2339 /// If the input number has no bits set -1U is returned.
2340 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2341   do {
2342     --n;
2343 
2344     if (parts[n] != 0) {
2345       unsigned msb = partMSB(parts[n]);
2346 
2347       return msb + n * APINT_BITS_PER_WORD;
2348     }
2349   } while (n);
2350 
2351   return -1U;
2352 }
2353 
2354 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
2355 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
2356 /// significant bit of DST.  All high bits above srcBITS in DST are zero-filled.
2357 /// */
2358 void
2359 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2360                  unsigned srcBits, unsigned srcLSB) {
2361   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2362   assert(dstParts <= dstCount);
2363 
2364   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2365   tcAssign(dst, src + firstSrcPart, dstParts);
2366 
2367   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2368   tcShiftRight(dst, dstParts, shift);
2369 
2370   // We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2371   // in DST.  If this is less that srcBits, append the rest, else
2372   // clear the high bits.
2373   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2374   if (n < srcBits) {
2375     WordType mask = lowBitMask (srcBits - n);
2376     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2377                           << n % APINT_BITS_PER_WORD);
2378   } else if (n > srcBits) {
2379     if (srcBits % APINT_BITS_PER_WORD)
2380       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2381   }
2382 
2383   // Clear high parts.
2384   while (dstParts < dstCount)
2385     dst[dstParts++] = 0;
2386 }
2387 
2388 //// DST += RHS + C where C is zero or one.  Returns the carry flag.
2389 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2390                              WordType c, unsigned parts) {
2391   assert(c <= 1);
2392 
2393   for (unsigned i = 0; i < parts; i++) {
2394     WordType l = dst[i];
2395     if (c) {
2396       dst[i] += rhs[i] + 1;
2397       c = (dst[i] <= l);
2398     } else {
2399       dst[i] += rhs[i];
2400       c = (dst[i] < l);
2401     }
2402   }
2403 
2404   return c;
2405 }
2406 
2407 /// This function adds a single "word" integer, src, to the multiple
2408 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2409 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2410 /// @returns the carry of the addition.
2411 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2412                                  unsigned parts) {
2413   for (unsigned i = 0; i < parts; ++i) {
2414     dst[i] += src;
2415     if (dst[i] >= src)
2416       return 0; // No need to carry so exit early.
2417     src = 1; // Carry one to next digit.
2418   }
2419 
2420   return 1;
2421 }
2422 
2423 /// DST -= RHS + C where C is zero or one.  Returns the carry flag.
2424 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2425                                   WordType c, unsigned parts) {
2426   assert(c <= 1);
2427 
2428   for (unsigned i = 0; i < parts; i++) {
2429     WordType l = dst[i];
2430     if (c) {
2431       dst[i] -= rhs[i] + 1;
2432       c = (dst[i] >= l);
2433     } else {
2434       dst[i] -= rhs[i];
2435       c = (dst[i] > l);
2436     }
2437   }
2438 
2439   return c;
2440 }
2441 
2442 /// This function subtracts a single "word" (64-bit word), src, from
2443 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2444 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2445 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2446 /// exhausted. In other words, if src > dst then this function returns 1,
2447 /// otherwise 0.
2448 /// @returns the borrow out of the subtraction
2449 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2450                                       unsigned parts) {
2451   for (unsigned i = 0; i < parts; ++i) {
2452     WordType Dst = dst[i];
2453     dst[i] -= src;
2454     if (src <= Dst)
2455       return 0; // No need to borrow so exit early.
2456     src = 1; // We have to "borrow 1" from next "word"
2457   }
2458 
2459   return 1;
2460 }
2461 
2462 /// Negate a bignum in-place.
2463 void APInt::tcNegate(WordType *dst, unsigned parts) {
2464   tcComplement(dst, parts);
2465   tcIncrement(dst, parts);
2466 }
2467 
2468 /// DST += SRC * MULTIPLIER + CARRY   if add is true
2469 /// DST  = SRC * MULTIPLIER + CARRY   if add is false
2470 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2471 /// they must start at the same point, i.e. DST == SRC.
2472 /// If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2473 /// returned.  Otherwise DST is filled with the least significant
2474 /// DSTPARTS parts of the result, and if all of the omitted higher
2475 /// parts were zero return zero, otherwise overflow occurred and
2476 /// return one.
2477 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2478                           WordType multiplier, WordType carry,
2479                           unsigned srcParts, unsigned dstParts,
2480                           bool add) {
2481   // Otherwise our writes of DST kill our later reads of SRC.
2482   assert(dst <= src || dst >= src + srcParts);
2483   assert(dstParts <= srcParts + 1);
2484 
2485   // N loops; minimum of dstParts and srcParts.
2486   unsigned n = std::min(dstParts, srcParts);
2487 
2488   for (unsigned i = 0; i < n; i++) {
2489     // [LOW, HIGH] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2490     // This cannot overflow, because:
2491     //   (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2492     // which is less than n^2.
2493     WordType srcPart = src[i];
2494     WordType low, mid, high;
2495     if (multiplier == 0 || srcPart == 0) {
2496       low = carry;
2497       high = 0;
2498     } else {
2499       low = lowHalf(srcPart) * lowHalf(multiplier);
2500       high = highHalf(srcPart) * highHalf(multiplier);
2501 
2502       mid = lowHalf(srcPart) * highHalf(multiplier);
2503       high += highHalf(mid);
2504       mid <<= APINT_BITS_PER_WORD / 2;
2505       if (low + mid < low)
2506         high++;
2507       low += mid;
2508 
2509       mid = highHalf(srcPart) * lowHalf(multiplier);
2510       high += highHalf(mid);
2511       mid <<= APINT_BITS_PER_WORD / 2;
2512       if (low + mid < low)
2513         high++;
2514       low += mid;
2515 
2516       // Now add carry.
2517       if (low + carry < low)
2518         high++;
2519       low += carry;
2520     }
2521 
2522     if (add) {
2523       // And now DST[i], and store the new low part there.
2524       if (low + dst[i] < low)
2525         high++;
2526       dst[i] += low;
2527     } else
2528       dst[i] = low;
2529 
2530     carry = high;
2531   }
2532 
2533   if (srcParts < dstParts) {
2534     // Full multiplication, there is no overflow.
2535     assert(srcParts + 1 == dstParts);
2536     dst[srcParts] = carry;
2537     return 0;
2538   }
2539 
2540   // We overflowed if there is carry.
2541   if (carry)
2542     return 1;
2543 
2544   // We would overflow if any significant unwritten parts would be
2545   // non-zero.  This is true if any remaining src parts are non-zero
2546   // and the multiplier is non-zero.
2547   if (multiplier)
2548     for (unsigned i = dstParts; i < srcParts; i++)
2549       if (src[i])
2550         return 1;
2551 
2552   // We fitted in the narrow destination.
2553   return 0;
2554 }
2555 
2556 /// DST = LHS * RHS, where DST has the same width as the operands and
2557 /// is filled with the least significant parts of the result.  Returns
2558 /// one if overflow occurred, otherwise zero.  DST must be disjoint
2559 /// from both operands.
2560 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2561                       const WordType *rhs, unsigned parts) {
2562   assert(dst != lhs && dst != rhs);
2563 
2564   int overflow = 0;
2565   tcSet(dst, 0, parts);
2566 
2567   for (unsigned i = 0; i < parts; i++)
2568     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2569                                parts - i, true);
2570 
2571   return overflow;
2572 }
2573 
2574 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2575 /// operands. No overflow occurs. DST must be disjoint from both operands.
2576 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2577                            const WordType *rhs, unsigned lhsParts,
2578                            unsigned rhsParts) {
2579   // Put the narrower number on the LHS for less loops below.
2580   if (lhsParts > rhsParts)
2581     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2582 
2583   assert(dst != lhs && dst != rhs);
2584 
2585   tcSet(dst, 0, rhsParts);
2586 
2587   for (unsigned i = 0; i < lhsParts; i++)
2588     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2589 }
2590 
2591 // If RHS is zero LHS and REMAINDER are left unchanged, return one.
2592 // Otherwise set LHS to LHS / RHS with the fractional part discarded,
2593 // set REMAINDER to the remainder, return zero.  i.e.
2594 //
2595 //   OLD_LHS = RHS * LHS + REMAINDER
2596 //
2597 // SCRATCH is a bignum of the same size as the operands and result for
2598 // use by the routine; its contents need not be initialized and are
2599 // destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2600 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2601                     WordType *remainder, WordType *srhs,
2602                     unsigned parts) {
2603   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2604 
2605   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2606   if (shiftCount == 0)
2607     return true;
2608 
2609   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2610   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2611   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2612 
2613   tcAssign(srhs, rhs, parts);
2614   tcShiftLeft(srhs, parts, shiftCount);
2615   tcAssign(remainder, lhs, parts);
2616   tcSet(lhs, 0, parts);
2617 
2618   // Loop, subtracting SRHS if REMAINDER is greater and adding that to the
2619   // total.
2620   for (;;) {
2621     int compare = tcCompare(remainder, srhs, parts);
2622     if (compare >= 0) {
2623       tcSubtract(remainder, srhs, 0, parts);
2624       lhs[n] |= mask;
2625     }
2626 
2627     if (shiftCount == 0)
2628       break;
2629     shiftCount--;
2630     tcShiftRight(srhs, parts, 1);
2631     if ((mask >>= 1) == 0) {
2632       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2633       n--;
2634     }
2635   }
2636 
2637   return false;
2638 }
2639 
2640 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2641 /// no restrictions on Count.
2642 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2643   // Don't bother performing a no-op shift.
2644   if (!Count)
2645     return;
2646 
2647   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2648   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2649   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2650 
2651   // Fastpath for moving by whole words.
2652   if (BitShift == 0) {
2653     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2654   } else {
2655     while (Words-- > WordShift) {
2656       Dst[Words] = Dst[Words - WordShift] << BitShift;
2657       if (Words > WordShift)
2658         Dst[Words] |=
2659           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2660     }
2661   }
2662 
2663   // Fill in the remainder with 0s.
2664   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2665 }
2666 
2667 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2668 /// are no restrictions on Count.
2669 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2670   // Don't bother performing a no-op shift.
2671   if (!Count)
2672     return;
2673 
2674   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2675   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2676   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2677 
2678   unsigned WordsToMove = Words - WordShift;
2679   // Fastpath for moving by whole words.
2680   if (BitShift == 0) {
2681     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2682   } else {
2683     for (unsigned i = 0; i != WordsToMove; ++i) {
2684       Dst[i] = Dst[i + WordShift] >> BitShift;
2685       if (i + 1 != WordsToMove)
2686         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2687     }
2688   }
2689 
2690   // Fill in the remainder with 0s.
2691   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2692 }
2693 
2694 // Comparison (unsigned) of two bignums.
2695 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2696                      unsigned parts) {
2697   while (parts) {
2698     parts--;
2699     if (lhs[parts] != rhs[parts])
2700       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2701   }
2702 
2703   return 0;
2704 }
2705 
2706 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2707                                    APInt::Rounding RM) {
2708   // Currently udivrem always rounds down.
2709   switch (RM) {
2710   case APInt::Rounding::DOWN:
2711   case APInt::Rounding::TOWARD_ZERO:
2712     return A.udiv(B);
2713   case APInt::Rounding::UP: {
2714     APInt Quo, Rem;
2715     APInt::udivrem(A, B, Quo, Rem);
2716     if (Rem == 0)
2717       return Quo;
2718     return Quo + 1;
2719   }
2720   }
2721   llvm_unreachable("Unknown APInt::Rounding enum");
2722 }
2723 
2724 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2725                                    APInt::Rounding RM) {
2726   switch (RM) {
2727   case APInt::Rounding::DOWN:
2728   case APInt::Rounding::UP: {
2729     APInt Quo, Rem;
2730     APInt::sdivrem(A, B, Quo, Rem);
2731     if (Rem == 0)
2732       return Quo;
2733     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2734     // We want to check whether the non-integer part of the mathematical value
2735     // is negative or not. If the non-integer part is negative, we need to round
2736     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2737     // already rounded down.
2738     if (RM == APInt::Rounding::DOWN) {
2739       if (Rem.isNegative() != B.isNegative())
2740         return Quo - 1;
2741       return Quo;
2742     }
2743     if (Rem.isNegative() != B.isNegative())
2744       return Quo;
2745     return Quo + 1;
2746   }
2747   // Currently sdiv rounds towards zero.
2748   case APInt::Rounding::TOWARD_ZERO:
2749     return A.sdiv(B);
2750   }
2751   llvm_unreachable("Unknown APInt::Rounding enum");
2752 }
2753 
2754 Optional<APInt>
2755 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2756                                            unsigned RangeWidth) {
2757   unsigned CoeffWidth = A.getBitWidth();
2758   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2759   assert(RangeWidth <= CoeffWidth &&
2760          "Value range width should be less than coefficient width");
2761   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2762 
2763   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2764                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2765 
2766   // Identify 0 as a (non)solution immediately.
2767   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2768     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2769     return APInt(CoeffWidth, 0);
2770   }
2771 
2772   // The result of APInt arithmetic has the same bit width as the operands,
2773   // so it can actually lose high bits. A product of two n-bit integers needs
2774   // 2n-1 bits to represent the full value.
2775   // The operation done below (on quadratic coefficients) that can produce
2776   // the largest value is the evaluation of the equation during bisection,
2777   // which needs 3 times the bitwidth of the coefficient, so the total number
2778   // of required bits is 3n.
2779   //
2780   // The purpose of this extension is to simulate the set Z of all integers,
2781   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2782   // and negative numbers (not so much in a modulo arithmetic). The method
2783   // used to solve the equation is based on the standard formula for real
2784   // numbers, and uses the concepts of "positive" and "negative" with their
2785   // usual meanings.
2786   CoeffWidth *= 3;
2787   A = A.sext(CoeffWidth);
2788   B = B.sext(CoeffWidth);
2789   C = C.sext(CoeffWidth);
2790 
2791   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2792   // the bit width has increased.
2793   if (A.isNegative()) {
2794     A.negate();
2795     B.negate();
2796     C.negate();
2797   }
2798 
2799   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2800   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2801   // and R = 2^BitWidth.
2802   // Since we're trying not only to find exact solutions, but also values
2803   // that "wrap around", such a set will always have a solution, i.e. an x
2804   // that satisfies at least one of the equations, or such that |q(x)|
2805   // exceeds kR, while |q(x-1)| for the same k does not.
2806   //
2807   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2808   // positive solution n (in the above sense), and also such that the n
2809   // will be the least among all solutions corresponding to k = 0, 1, ...
2810   // (more precisely, the least element in the set
2811   //   { n(k) | k is such that a solution n(k) exists }).
2812   //
2813   // Consider the parabola (over real numbers) that corresponds to the
2814   // quadratic equation. Since A > 0, the arms of the parabola will point
2815   // up. Picking different values of k will shift it up and down by R.
2816   //
2817   // We want to shift the parabola in such a way as to reduce the problem
2818   // of solving q(x) = kR to solving shifted_q(x) = 0.
2819   // (The interesting solutions are the ceilings of the real number
2820   // solutions.)
2821   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2822   APInt TwoA = 2 * A;
2823   APInt SqrB = B * B;
2824   bool PickLow;
2825 
2826   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2827     assert(A.isStrictlyPositive());
2828     APInt T = V.abs().urem(A);
2829     if (T.isNullValue())
2830       return V;
2831     return V.isNegative() ? V+T : V+(A-T);
2832   };
2833 
2834   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2835   // iff B is positive.
2836   if (B.isNonNegative()) {
2837     // If B >= 0, the vertex it at a negative location (or at 0), so in
2838     // order to have a non-negative solution we need to pick k that makes
2839     // C-kR negative. To satisfy all the requirements for the solution
2840     // that we are looking for, it needs to be closest to 0 of all k.
2841     C = C.srem(R);
2842     if (C.isStrictlyPositive())
2843       C -= R;
2844     // Pick the greater solution.
2845     PickLow = false;
2846   } else {
2847     // If B < 0, the vertex is at a positive location. For any solution
2848     // to exist, the discriminant must be non-negative. This means that
2849     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2850     // lower bound on values of k: kR >= C - B^2/4A.
2851     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2852     // Round LowkR up (towards +inf) to the nearest kR.
2853     LowkR = RoundUp(LowkR, R);
2854 
2855     // If there exists k meeting the condition above, and such that
2856     // C-kR > 0, there will be two positive real number solutions of
2857     // q(x) = kR. Out of all such values of k, pick the one that makes
2858     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2859     // In other words, find maximum k such that LowkR <= kR < C.
2860     if (C.sgt(LowkR)) {
2861       // If LowkR < C, then such a k is guaranteed to exist because
2862       // LowkR itself is a multiple of R.
2863       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2864       // Pick the smaller solution.
2865       PickLow = true;
2866     } else {
2867       // If C-kR < 0 for all potential k's, it means that one solution
2868       // will be negative, while the other will be positive. The positive
2869       // solution will shift towards 0 if the parabola is moved up.
2870       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2871       // to 0, or in other words, out of all parabolas that have solutions,
2872       // pick the one that is the farthest "up").
2873       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2874       C -= LowkR;
2875       // Pick the greater solution.
2876       PickLow = false;
2877     }
2878   }
2879 
2880   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2881                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2882 
2883   APInt D = SqrB - 4*A*C;
2884   assert(D.isNonNegative() && "Negative discriminant");
2885   APInt SQ = D.sqrt();
2886 
2887   APInt Q = SQ * SQ;
2888   bool InexactSQ = Q != D;
2889   // The calculated SQ may actually be greater than the exact (non-integer)
2890   // value. If that's the case, decrement SQ to get a value that is lower.
2891   if (Q.sgt(D))
2892     SQ -= 1;
2893 
2894   APInt X;
2895   APInt Rem;
2896 
2897   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2898   // When using the quadratic formula directly, the calculated low root
2899   // may be greater than the exact one, since we would be subtracting SQ.
2900   // To make sure that the calculated root is not greater than the exact
2901   // one, subtract SQ+1 when calculating the low root (for inexact value
2902   // of SQ).
2903   if (PickLow)
2904     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2905   else
2906     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2907 
2908   // The updated coefficients should be such that the (exact) solution is
2909   // positive. Since APInt division rounds towards 0, the calculated one
2910   // can be 0, but cannot be negative.
2911   assert(X.isNonNegative() && "Solution should be non-negative");
2912 
2913   if (!InexactSQ && Rem.isNullValue()) {
2914     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2915     return X;
2916   }
2917 
2918   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2919   // The exact value of the square root of D should be between SQ and SQ+1.
2920   // This implies that the solution should be between that corresponding to
2921   // SQ (i.e. X) and that corresponding to SQ+1.
2922   //
2923   // The calculated X cannot be greater than the exact (real) solution.
2924   // Actually it must be strictly less than the exact solution, while
2925   // X+1 will be greater than or equal to it.
2926 
2927   APInt VX = (A*X + B)*X + C;
2928   APInt VY = VX + TwoA*X + A + B;
2929   bool SignChange = VX.isNegative() != VY.isNegative() ||
2930                     VX.isNullValue() != VY.isNullValue();
2931   // If the sign did not change between X and X+1, X is not a valid solution.
2932   // This could happen when the actual (exact) roots don't have an integer
2933   // between them, so they would both be contained between X and X+1.
2934   if (!SignChange) {
2935     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2936     return None;
2937   }
2938 
2939   X += 1;
2940   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2941   return X;
2942 }
2943 
2944 Optional<unsigned>
2945 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
2946   assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
2947   if (A == B)
2948     return llvm::None;
2949   return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
2950 }
2951 
2952 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2953 /// with the integer held in IntVal.
2954 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
2955                             unsigned StoreBytes) {
2956   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
2957   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
2958 
2959   if (sys::IsLittleEndianHost) {
2960     // Little-endian host - the source is ordered from LSB to MSB.  Order the
2961     // destination from LSB to MSB: Do a straight copy.
2962     memcpy(Dst, Src, StoreBytes);
2963   } else {
2964     // Big-endian host - the source is an array of 64 bit words ordered from
2965     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
2966     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
2967     while (StoreBytes > sizeof(uint64_t)) {
2968       StoreBytes -= sizeof(uint64_t);
2969       // May not be aligned so use memcpy.
2970       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
2971       Src += sizeof(uint64_t);
2972     }
2973 
2974     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
2975   }
2976 }
2977 
2978 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2979 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2980 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
2981                              unsigned LoadBytes) {
2982   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
2983   uint8_t *Dst = reinterpret_cast<uint8_t *>(
2984                    const_cast<uint64_t *>(IntVal.getRawData()));
2985 
2986   if (sys::IsLittleEndianHost)
2987     // Little-endian host - the destination must be ordered from LSB to MSB.
2988     // The source is ordered from LSB to MSB: Do a straight copy.
2989     memcpy(Dst, Src, LoadBytes);
2990   else {
2991     // Big-endian - the destination is an array of 64 bit words ordered from
2992     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
2993     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
2994     // a word.
2995     while (LoadBytes > sizeof(uint64_t)) {
2996       LoadBytes -= sizeof(uint64_t);
2997       // May not be aligned so use memcpy.
2998       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
2999       Dst += sizeof(uint64_t);
3000     }
3001 
3002     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3003   }
3004 }
3005