1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APInt to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246 
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250 
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254 
255 APInt& APInt::operator*=(const APInt& RHS) {
256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORDTYPE_MAX;
328 }
329 
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332   tcComplement(U.pVal, getNumWords());
333   clearUnusedBits();
334 }
335 
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
341   setBitVal(bitPosition, !(*this)[bitPosition]);
342 }
343 
344 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
345   unsigned subBitWidth = subBits.getBitWidth();
346   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
347          "Illegal bit insertion");
348 
349   // Insertion is a direct copy.
350   if (subBitWidth == BitWidth) {
351     *this = subBits;
352     return;
353   }
354 
355   // Single word result can be done as a direct bitmask.
356   if (isSingleWord()) {
357     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
358     U.VAL &= ~(mask << bitPosition);
359     U.VAL |= (subBits.U.VAL << bitPosition);
360     return;
361   }
362 
363   unsigned loBit = whichBit(bitPosition);
364   unsigned loWord = whichWord(bitPosition);
365   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
366 
367   // Insertion within a single word can be done as a direct bitmask.
368   if (loWord == hi1Word) {
369     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
370     U.pVal[loWord] &= ~(mask << loBit);
371     U.pVal[loWord] |= (subBits.U.VAL << loBit);
372     return;
373   }
374 
375   // Insert on word boundaries.
376   if (loBit == 0) {
377     // Direct copy whole words.
378     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
379     memcpy(U.pVal + loWord, subBits.getRawData(),
380            numWholeSubWords * APINT_WORD_SIZE);
381 
382     // Mask+insert remaining bits.
383     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
384     if (remainingBits != 0) {
385       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
386       U.pVal[hi1Word] &= ~mask;
387       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
388     }
389     return;
390   }
391 
392   // General case - set/clear individual bits in dst based on src.
393   // TODO - there is scope for optimization here, but at the moment this code
394   // path is barely used so prefer readability over performance.
395   for (unsigned i = 0; i != subBitWidth; ++i)
396     setBitVal(bitPosition + i, subBits[i]);
397 }
398 
399 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
400   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
401   subBits &= maskBits;
402   if (isSingleWord()) {
403     U.VAL &= ~(maskBits << bitPosition);
404     U.VAL |= subBits << bitPosition;
405     return;
406   }
407 
408   unsigned loBit = whichBit(bitPosition);
409   unsigned loWord = whichWord(bitPosition);
410   unsigned hiWord = whichWord(bitPosition + numBits - 1);
411   if (loWord == hiWord) {
412     U.pVal[loWord] &= ~(maskBits << loBit);
413     U.pVal[loWord] |= subBits << loBit;
414     return;
415   }
416 
417   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
418   unsigned wordBits = 8 * sizeof(WordType);
419   U.pVal[loWord] &= ~(maskBits << loBit);
420   U.pVal[loWord] |= subBits << loBit;
421 
422   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
423   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
424 }
425 
426 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
427   assert(numBits > 0 && "Can't extract zero bits");
428   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
429          "Illegal bit extraction");
430 
431   if (isSingleWord())
432     return APInt(numBits, U.VAL >> bitPosition);
433 
434   unsigned loBit = whichBit(bitPosition);
435   unsigned loWord = whichWord(bitPosition);
436   unsigned hiWord = whichWord(bitPosition + numBits - 1);
437 
438   // Single word result extracting bits from a single word source.
439   if (loWord == hiWord)
440     return APInt(numBits, U.pVal[loWord] >> loBit);
441 
442   // Extracting bits that start on a source word boundary can be done
443   // as a fast memory copy.
444   if (loBit == 0)
445     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
446 
447   // General case - shift + copy source words directly into place.
448   APInt Result(numBits, 0);
449   unsigned NumSrcWords = getNumWords();
450   unsigned NumDstWords = Result.getNumWords();
451 
452   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
453   for (unsigned word = 0; word < NumDstWords; ++word) {
454     uint64_t w0 = U.pVal[loWord + word];
455     uint64_t w1 =
456         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
457     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
458   }
459 
460   return Result.clearUnusedBits();
461 }
462 
463 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
464                                        unsigned bitPosition) const {
465   assert(numBits > 0 && "Can't extract zero bits");
466   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
467          "Illegal bit extraction");
468   assert(numBits <= 64 && "Illegal bit extraction");
469 
470   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
471   if (isSingleWord())
472     return (U.VAL >> bitPosition) & maskBits;
473 
474   unsigned loBit = whichBit(bitPosition);
475   unsigned loWord = whichWord(bitPosition);
476   unsigned hiWord = whichWord(bitPosition + numBits - 1);
477   if (loWord == hiWord)
478     return (U.pVal[loWord] >> loBit) & maskBits;
479 
480   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
481   unsigned wordBits = 8 * sizeof(WordType);
482   uint64_t retBits = U.pVal[loWord] >> loBit;
483   retBits |= U.pVal[hiWord] << (wordBits - loBit);
484   retBits &= maskBits;
485   return retBits;
486 }
487 
488 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
489   assert(!str.empty() && "Invalid string length");
490   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
491           radix == 36) &&
492          "Radix should be 2, 8, 10, 16, or 36!");
493 
494   size_t slen = str.size();
495 
496   // Each computation below needs to know if it's negative.
497   StringRef::iterator p = str.begin();
498   unsigned isNegative = *p == '-';
499   if (*p == '-' || *p == '+') {
500     p++;
501     slen--;
502     assert(slen && "String is only a sign, needs a value.");
503   }
504 
505   // For radixes of power-of-two values, the bits required is accurately and
506   // easily computed
507   if (radix == 2)
508     return slen + isNegative;
509   if (radix == 8)
510     return slen * 3 + isNegative;
511   if (radix == 16)
512     return slen * 4 + isNegative;
513 
514   // FIXME: base 36
515 
516   // This is grossly inefficient but accurate. We could probably do something
517   // with a computation of roughly slen*64/20 and then adjust by the value of
518   // the first few digits. But, I'm not sure how accurate that could be.
519 
520   // Compute a sufficient number of bits that is always large enough but might
521   // be too large. This avoids the assertion in the constructor. This
522   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
523   // bits in that case.
524   unsigned sufficient
525     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
526                  : (slen == 1 ? 7 : slen * 16/3);
527 
528   // Convert to the actual binary value.
529   APInt tmp(sufficient, StringRef(p, slen), radix);
530 
531   // Compute how many bits are required. If the log is infinite, assume we need
532   // just bit. If the log is exact and value is negative, then the value is
533   // MinSignedValue with (log + 1) bits.
534   unsigned log = tmp.logBase2();
535   if (log == (unsigned)-1) {
536     return isNegative + 1;
537   } else if (isNegative && tmp.isPowerOf2()) {
538     return isNegative + log;
539   } else {
540     return isNegative + log + 1;
541   }
542 }
543 
544 hash_code llvm::hash_value(const APInt &Arg) {
545   if (Arg.isSingleWord())
546     return hash_combine(Arg.BitWidth, Arg.U.VAL);
547 
548   return hash_combine(
549       Arg.BitWidth,
550       hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
551 }
552 
553 bool APInt::isSplat(unsigned SplatSizeInBits) const {
554   assert(getBitWidth() % SplatSizeInBits == 0 &&
555          "SplatSizeInBits must divide width!");
556   // We can check that all parts of an integer are equal by making use of a
557   // little trick: rotate and check if it's still the same value.
558   return *this == rotl(SplatSizeInBits);
559 }
560 
561 /// This function returns the high "numBits" bits of this APInt.
562 APInt APInt::getHiBits(unsigned numBits) const {
563   return this->lshr(BitWidth - numBits);
564 }
565 
566 /// This function returns the low "numBits" bits of this APInt.
567 APInt APInt::getLoBits(unsigned numBits) const {
568   APInt Result(getLowBitsSet(BitWidth, numBits));
569   Result &= *this;
570   return Result;
571 }
572 
573 /// Return a value containing V broadcasted over NewLen bits.
574 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
575   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
576 
577   APInt Val = V.zextOrSelf(NewLen);
578   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
579     Val |= Val << I;
580 
581   return Val;
582 }
583 
584 unsigned APInt::countLeadingZerosSlowCase() const {
585   unsigned Count = 0;
586   for (int i = getNumWords()-1; i >= 0; --i) {
587     uint64_t V = U.pVal[i];
588     if (V == 0)
589       Count += APINT_BITS_PER_WORD;
590     else {
591       Count += llvm::countLeadingZeros(V);
592       break;
593     }
594   }
595   // Adjust for unused bits in the most significant word (they are zero).
596   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
597   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
598   return Count;
599 }
600 
601 unsigned APInt::countLeadingOnesSlowCase() const {
602   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
603   unsigned shift;
604   if (!highWordBits) {
605     highWordBits = APINT_BITS_PER_WORD;
606     shift = 0;
607   } else {
608     shift = APINT_BITS_PER_WORD - highWordBits;
609   }
610   int i = getNumWords() - 1;
611   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
612   if (Count == highWordBits) {
613     for (i--; i >= 0; --i) {
614       if (U.pVal[i] == WORDTYPE_MAX)
615         Count += APINT_BITS_PER_WORD;
616       else {
617         Count += llvm::countLeadingOnes(U.pVal[i]);
618         break;
619       }
620     }
621   }
622   return Count;
623 }
624 
625 unsigned APInt::countTrailingZerosSlowCase() const {
626   unsigned Count = 0;
627   unsigned i = 0;
628   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
629     Count += APINT_BITS_PER_WORD;
630   if (i < getNumWords())
631     Count += llvm::countTrailingZeros(U.pVal[i]);
632   return std::min(Count, BitWidth);
633 }
634 
635 unsigned APInt::countTrailingOnesSlowCase() const {
636   unsigned Count = 0;
637   unsigned i = 0;
638   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
639     Count += APINT_BITS_PER_WORD;
640   if (i < getNumWords())
641     Count += llvm::countTrailingOnes(U.pVal[i]);
642   assert(Count <= BitWidth);
643   return Count;
644 }
645 
646 unsigned APInt::countPopulationSlowCase() const {
647   unsigned Count = 0;
648   for (unsigned i = 0; i < getNumWords(); ++i)
649     Count += llvm::countPopulation(U.pVal[i]);
650   return Count;
651 }
652 
653 bool APInt::intersectsSlowCase(const APInt &RHS) const {
654   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
655     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
656       return true;
657 
658   return false;
659 }
660 
661 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
662   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
663     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
664       return false;
665 
666   return true;
667 }
668 
669 APInt APInt::byteSwap() const {
670   assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
671   if (BitWidth == 16)
672     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
673   if (BitWidth == 32)
674     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
675   if (BitWidth <= 64) {
676     uint64_t Tmp1 = ByteSwap_64(U.VAL);
677     Tmp1 >>= (64 - BitWidth);
678     return APInt(BitWidth, Tmp1);
679   }
680 
681   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
682   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
683     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
684   if (Result.BitWidth != BitWidth) {
685     Result.lshrInPlace(Result.BitWidth - BitWidth);
686     Result.BitWidth = BitWidth;
687   }
688   return Result;
689 }
690 
691 APInt APInt::reverseBits() const {
692   switch (BitWidth) {
693   case 64:
694     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
695   case 32:
696     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
697   case 16:
698     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
699   case 8:
700     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
701   default:
702     break;
703   }
704 
705   APInt Val(*this);
706   APInt Reversed(BitWidth, 0);
707   unsigned S = BitWidth;
708 
709   for (; Val != 0; Val.lshrInPlace(1)) {
710     Reversed <<= 1;
711     Reversed |= Val[0];
712     --S;
713   }
714 
715   Reversed <<= S;
716   return Reversed;
717 }
718 
719 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
720   // Fast-path a common case.
721   if (A == B) return A;
722 
723   // Corner cases: if either operand is zero, the other is the gcd.
724   if (!A) return B;
725   if (!B) return A;
726 
727   // Count common powers of 2 and remove all other powers of 2.
728   unsigned Pow2;
729   {
730     unsigned Pow2_A = A.countTrailingZeros();
731     unsigned Pow2_B = B.countTrailingZeros();
732     if (Pow2_A > Pow2_B) {
733       A.lshrInPlace(Pow2_A - Pow2_B);
734       Pow2 = Pow2_B;
735     } else if (Pow2_B > Pow2_A) {
736       B.lshrInPlace(Pow2_B - Pow2_A);
737       Pow2 = Pow2_A;
738     } else {
739       Pow2 = Pow2_A;
740     }
741   }
742 
743   // Both operands are odd multiples of 2^Pow_2:
744   //
745   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
746   //
747   // This is a modified version of Stein's algorithm, taking advantage of
748   // efficient countTrailingZeros().
749   while (A != B) {
750     if (A.ugt(B)) {
751       A -= B;
752       A.lshrInPlace(A.countTrailingZeros() - Pow2);
753     } else {
754       B -= A;
755       B.lshrInPlace(B.countTrailingZeros() - Pow2);
756     }
757   }
758 
759   return A;
760 }
761 
762 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
763   uint64_t I = bit_cast<uint64_t>(Double);
764 
765   // Get the sign bit from the highest order bit
766   bool isNeg = I >> 63;
767 
768   // Get the 11-bit exponent and adjust for the 1023 bit bias
769   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
770 
771   // If the exponent is negative, the value is < 0 so just return 0.
772   if (exp < 0)
773     return APInt(width, 0u);
774 
775   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
776   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
777 
778   // If the exponent doesn't shift all bits out of the mantissa
779   if (exp < 52)
780     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
781                     APInt(width, mantissa >> (52 - exp));
782 
783   // If the client didn't provide enough bits for us to shift the mantissa into
784   // then the result is undefined, just return 0
785   if (width <= exp - 52)
786     return APInt(width, 0);
787 
788   // Otherwise, we have to shift the mantissa bits up to the right location
789   APInt Tmp(width, mantissa);
790   Tmp <<= (unsigned)exp - 52;
791   return isNeg ? -Tmp : Tmp;
792 }
793 
794 /// This function converts this APInt to a double.
795 /// The layout for double is as following (IEEE Standard 754):
796 ///  --------------------------------------
797 /// |  Sign    Exponent    Fraction    Bias |
798 /// |-------------------------------------- |
799 /// |  1[63]   11[62-52]   52[51-00]   1023 |
800 ///  --------------------------------------
801 double APInt::roundToDouble(bool isSigned) const {
802 
803   // Handle the simple case where the value is contained in one uint64_t.
804   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
805   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
806     if (isSigned) {
807       int64_t sext = SignExtend64(getWord(0), BitWidth);
808       return double(sext);
809     } else
810       return double(getWord(0));
811   }
812 
813   // Determine if the value is negative.
814   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
815 
816   // Construct the absolute value if we're negative.
817   APInt Tmp(isNeg ? -(*this) : (*this));
818 
819   // Figure out how many bits we're using.
820   unsigned n = Tmp.getActiveBits();
821 
822   // The exponent (without bias normalization) is just the number of bits
823   // we are using. Note that the sign bit is gone since we constructed the
824   // absolute value.
825   uint64_t exp = n;
826 
827   // Return infinity for exponent overflow
828   if (exp > 1023) {
829     if (!isSigned || !isNeg)
830       return std::numeric_limits<double>::infinity();
831     else
832       return -std::numeric_limits<double>::infinity();
833   }
834   exp += 1023; // Increment for 1023 bias
835 
836   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
837   // extract the high 52 bits from the correct words in pVal.
838   uint64_t mantissa;
839   unsigned hiWord = whichWord(n-1);
840   if (hiWord == 0) {
841     mantissa = Tmp.U.pVal[0];
842     if (n > 52)
843       mantissa >>= n - 52; // shift down, we want the top 52 bits.
844   } else {
845     assert(hiWord > 0 && "huh?");
846     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
847     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
848     mantissa = hibits | lobits;
849   }
850 
851   // The leading bit of mantissa is implicit, so get rid of it.
852   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
853   uint64_t I = sign | (exp << 52) | mantissa;
854   return bit_cast<double>(I);
855 }
856 
857 // Truncate to new width.
858 APInt APInt::trunc(unsigned width) const {
859   assert(width < BitWidth && "Invalid APInt Truncate request");
860   assert(width && "Can't truncate to 0 bits");
861 
862   if (width <= APINT_BITS_PER_WORD)
863     return APInt(width, getRawData()[0]);
864 
865   APInt Result(getMemory(getNumWords(width)), width);
866 
867   // Copy full words.
868   unsigned i;
869   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
870     Result.U.pVal[i] = U.pVal[i];
871 
872   // Truncate and copy any partial word.
873   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
874   if (bits != 0)
875     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
876 
877   return Result;
878 }
879 
880 // Truncate to new width with unsigned saturation.
881 APInt APInt::truncUSat(unsigned width) const {
882   assert(width < BitWidth && "Invalid APInt Truncate request");
883   assert(width && "Can't truncate to 0 bits");
884 
885   // Can we just losslessly truncate it?
886   if (isIntN(width))
887     return trunc(width);
888   // If not, then just return the new limit.
889   return APInt::getMaxValue(width);
890 }
891 
892 // Truncate to new width with signed saturation.
893 APInt APInt::truncSSat(unsigned width) const {
894   assert(width < BitWidth && "Invalid APInt Truncate request");
895   assert(width && "Can't truncate to 0 bits");
896 
897   // Can we just losslessly truncate it?
898   if (isSignedIntN(width))
899     return trunc(width);
900   // If not, then just return the new limits.
901   return isNegative() ? APInt::getSignedMinValue(width)
902                       : APInt::getSignedMaxValue(width);
903 }
904 
905 // Sign extend to a new width.
906 APInt APInt::sext(unsigned Width) const {
907   assert(Width > BitWidth && "Invalid APInt SignExtend request");
908 
909   if (Width <= APINT_BITS_PER_WORD)
910     return APInt(Width, SignExtend64(U.VAL, BitWidth));
911 
912   APInt Result(getMemory(getNumWords(Width)), Width);
913 
914   // Copy words.
915   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
916 
917   // Sign extend the last word since there may be unused bits in the input.
918   Result.U.pVal[getNumWords() - 1] =
919       SignExtend64(Result.U.pVal[getNumWords() - 1],
920                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
921 
922   // Fill with sign bits.
923   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
924               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
925   Result.clearUnusedBits();
926   return Result;
927 }
928 
929 //  Zero extend to a new width.
930 APInt APInt::zext(unsigned width) const {
931   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
932 
933   if (width <= APINT_BITS_PER_WORD)
934     return APInt(width, U.VAL);
935 
936   APInt Result(getMemory(getNumWords(width)), width);
937 
938   // Copy words.
939   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
940 
941   // Zero remaining words.
942   std::memset(Result.U.pVal + getNumWords(), 0,
943               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
944 
945   return Result;
946 }
947 
948 APInt APInt::zextOrTrunc(unsigned width) const {
949   if (BitWidth < width)
950     return zext(width);
951   if (BitWidth > width)
952     return trunc(width);
953   return *this;
954 }
955 
956 APInt APInt::sextOrTrunc(unsigned width) const {
957   if (BitWidth < width)
958     return sext(width);
959   if (BitWidth > width)
960     return trunc(width);
961   return *this;
962 }
963 
964 APInt APInt::zextOrSelf(unsigned width) const {
965   if (BitWidth < width)
966     return zext(width);
967   return *this;
968 }
969 
970 APInt APInt::sextOrSelf(unsigned width) const {
971   if (BitWidth < width)
972     return sext(width);
973   return *this;
974 }
975 
976 /// Arithmetic right-shift this APInt by shiftAmt.
977 /// Arithmetic right-shift function.
978 void APInt::ashrInPlace(const APInt &shiftAmt) {
979   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
980 }
981 
982 /// Arithmetic right-shift this APInt by shiftAmt.
983 /// Arithmetic right-shift function.
984 void APInt::ashrSlowCase(unsigned ShiftAmt) {
985   // Don't bother performing a no-op shift.
986   if (!ShiftAmt)
987     return;
988 
989   // Save the original sign bit for later.
990   bool Negative = isNegative();
991 
992   // WordShift is the inter-part shift; BitShift is intra-part shift.
993   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
994   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
995 
996   unsigned WordsToMove = getNumWords() - WordShift;
997   if (WordsToMove != 0) {
998     // Sign extend the last word to fill in the unused bits.
999     U.pVal[getNumWords() - 1] = SignExtend64(
1000         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1001 
1002     // Fastpath for moving by whole words.
1003     if (BitShift == 0) {
1004       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1005     } else {
1006       // Move the words containing significant bits.
1007       for (unsigned i = 0; i != WordsToMove - 1; ++i)
1008         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1009                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1010 
1011       // Handle the last word which has no high bits to copy.
1012       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1013       // Sign extend one more time.
1014       U.pVal[WordsToMove - 1] =
1015           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1016     }
1017   }
1018 
1019   // Fill in the remainder based on the original sign.
1020   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1021               WordShift * APINT_WORD_SIZE);
1022   clearUnusedBits();
1023 }
1024 
1025 /// Logical right-shift this APInt by shiftAmt.
1026 /// Logical right-shift function.
1027 void APInt::lshrInPlace(const APInt &shiftAmt) {
1028   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1029 }
1030 
1031 /// Logical right-shift this APInt by shiftAmt.
1032 /// Logical right-shift function.
1033 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1034   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1035 }
1036 
1037 /// Left-shift this APInt by shiftAmt.
1038 /// Left-shift function.
1039 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1040   // It's undefined behavior in C to shift by BitWidth or greater.
1041   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1042   return *this;
1043 }
1044 
1045 void APInt::shlSlowCase(unsigned ShiftAmt) {
1046   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1047   clearUnusedBits();
1048 }
1049 
1050 // Calculate the rotate amount modulo the bit width.
1051 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1052   unsigned rotBitWidth = rotateAmt.getBitWidth();
1053   APInt rot = rotateAmt;
1054   if (rotBitWidth < BitWidth) {
1055     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1056     // e.g. APInt(1, 32) would give APInt(1, 0).
1057     rot = rotateAmt.zext(BitWidth);
1058   }
1059   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1060   return rot.getLimitedValue(BitWidth);
1061 }
1062 
1063 APInt APInt::rotl(const APInt &rotateAmt) const {
1064   return rotl(rotateModulo(BitWidth, rotateAmt));
1065 }
1066 
1067 APInt APInt::rotl(unsigned rotateAmt) const {
1068   rotateAmt %= BitWidth;
1069   if (rotateAmt == 0)
1070     return *this;
1071   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1072 }
1073 
1074 APInt APInt::rotr(const APInt &rotateAmt) const {
1075   return rotr(rotateModulo(BitWidth, rotateAmt));
1076 }
1077 
1078 APInt APInt::rotr(unsigned rotateAmt) const {
1079   rotateAmt %= BitWidth;
1080   if (rotateAmt == 0)
1081     return *this;
1082   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1083 }
1084 
1085 // Square Root - this method computes and returns the square root of "this".
1086 // Three mechanisms are used for computation. For small values (<= 5 bits),
1087 // a table lookup is done. This gets some performance for common cases. For
1088 // values using less than 52 bits, the value is converted to double and then
1089 // the libc sqrt function is called. The result is rounded and then converted
1090 // back to a uint64_t which is then used to construct the result. Finally,
1091 // the Babylonian method for computing square roots is used.
1092 APInt APInt::sqrt() const {
1093 
1094   // Determine the magnitude of the value.
1095   unsigned magnitude = getActiveBits();
1096 
1097   // Use a fast table for some small values. This also gets rid of some
1098   // rounding errors in libc sqrt for small values.
1099   if (magnitude <= 5) {
1100     static const uint8_t results[32] = {
1101       /*     0 */ 0,
1102       /*  1- 2 */ 1, 1,
1103       /*  3- 6 */ 2, 2, 2, 2,
1104       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1105       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1106       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1107       /*    31 */ 6
1108     };
1109     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1110   }
1111 
1112   // If the magnitude of the value fits in less than 52 bits (the precision of
1113   // an IEEE double precision floating point value), then we can use the
1114   // libc sqrt function which will probably use a hardware sqrt computation.
1115   // This should be faster than the algorithm below.
1116   if (magnitude < 52) {
1117     return APInt(BitWidth,
1118                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1119                                                                : U.pVal[0])))));
1120   }
1121 
1122   // Okay, all the short cuts are exhausted. We must compute it. The following
1123   // is a classical Babylonian method for computing the square root. This code
1124   // was adapted to APInt from a wikipedia article on such computations.
1125   // See http://www.wikipedia.org/ and go to the page named
1126   // Calculate_an_integer_square_root.
1127   unsigned nbits = BitWidth, i = 4;
1128   APInt testy(BitWidth, 16);
1129   APInt x_old(BitWidth, 1);
1130   APInt x_new(BitWidth, 0);
1131   APInt two(BitWidth, 2);
1132 
1133   // Select a good starting value using binary logarithms.
1134   for (;; i += 2, testy = testy.shl(2))
1135     if (i >= nbits || this->ule(testy)) {
1136       x_old = x_old.shl(i / 2);
1137       break;
1138     }
1139 
1140   // Use the Babylonian method to arrive at the integer square root:
1141   for (;;) {
1142     x_new = (this->udiv(x_old) + x_old).udiv(two);
1143     if (x_old.ule(x_new))
1144       break;
1145     x_old = x_new;
1146   }
1147 
1148   // Make sure we return the closest approximation
1149   // NOTE: The rounding calculation below is correct. It will produce an
1150   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1151   // determined to be a rounding issue with pari/gp as it begins to use a
1152   // floating point representation after 192 bits. There are no discrepancies
1153   // between this algorithm and pari/gp for bit widths < 192 bits.
1154   APInt square(x_old * x_old);
1155   APInt nextSquare((x_old + 1) * (x_old +1));
1156   if (this->ult(square))
1157     return x_old;
1158   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1159   APInt midpoint((nextSquare - square).udiv(two));
1160   APInt offset(*this - square);
1161   if (offset.ult(midpoint))
1162     return x_old;
1163   return x_old + 1;
1164 }
1165 
1166 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1167 /// iterative extended Euclidean algorithm is used to solve for this value,
1168 /// however we simplify it to speed up calculating only the inverse, and take
1169 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1170 /// (potentially large) APInts around.
1171 /// WARNING: a value of '0' may be returned,
1172 ///          signifying that no multiplicative inverse exists!
1173 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1174   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1175 
1176   // Using the properties listed at the following web page (accessed 06/21/08):
1177   //   http://www.numbertheory.org/php/euclid.html
1178   // (especially the properties numbered 3, 4 and 9) it can be proved that
1179   // BitWidth bits suffice for all the computations in the algorithm implemented
1180   // below. More precisely, this number of bits suffice if the multiplicative
1181   // inverse exists, but may not suffice for the general extended Euclidean
1182   // algorithm.
1183 
1184   APInt r[2] = { modulo, *this };
1185   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1186   APInt q(BitWidth, 0);
1187 
1188   unsigned i;
1189   for (i = 0; r[i^1] != 0; i ^= 1) {
1190     // An overview of the math without the confusing bit-flipping:
1191     // q = r[i-2] / r[i-1]
1192     // r[i] = r[i-2] % r[i-1]
1193     // t[i] = t[i-2] - t[i-1] * q
1194     udivrem(r[i], r[i^1], q, r[i]);
1195     t[i] -= t[i^1] * q;
1196   }
1197 
1198   // If this APInt and the modulo are not coprime, there is no multiplicative
1199   // inverse, so return 0. We check this by looking at the next-to-last
1200   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1201   // algorithm.
1202   if (r[i] != 1)
1203     return APInt(BitWidth, 0);
1204 
1205   // The next-to-last t is the multiplicative inverse.  However, we are
1206   // interested in a positive inverse. Calculate a positive one from a negative
1207   // one if necessary. A simple addition of the modulo suffices because
1208   // abs(t[i]) is known to be less than *this/2 (see the link above).
1209   if (t[i].isNegative())
1210     t[i] += modulo;
1211 
1212   return std::move(t[i]);
1213 }
1214 
1215 /// Calculate the magic numbers required to implement a signed integer division
1216 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1217 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1218 /// Warren, Jr., chapter 10.
1219 APInt::ms APInt::magic() const {
1220   const APInt& d = *this;
1221   unsigned p;
1222   APInt ad, anc, delta, q1, r1, q2, r2, t;
1223   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1224   struct ms mag;
1225 
1226   ad = d.abs();
1227   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1228   anc = t - 1 - t.urem(ad);   // absolute value of nc
1229   p = d.getBitWidth() - 1;    // initialize p
1230   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1231   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1232   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1233   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1234   do {
1235     p = p + 1;
1236     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1237     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1238     if (r1.uge(anc)) {  // must be unsigned comparison
1239       q1 = q1 + 1;
1240       r1 = r1 - anc;
1241     }
1242     q2 = q2<<1;          // update q2 = 2p/abs(d)
1243     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1244     if (r2.uge(ad)) {   // must be unsigned comparison
1245       q2 = q2 + 1;
1246       r2 = r2 - ad;
1247     }
1248     delta = ad - r2;
1249   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1250 
1251   mag.m = q2 + 1;
1252   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1253   mag.s = p - d.getBitWidth();          // resulting shift
1254   return mag;
1255 }
1256 
1257 /// Calculate the magic numbers required to implement an unsigned integer
1258 /// division by a constant as a sequence of multiplies, adds and shifts.
1259 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1260 /// S. Warren, Jr., chapter 10.
1261 /// LeadingZeros can be used to simplify the calculation if the upper bits
1262 /// of the divided value are known zero.
1263 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1264   const APInt& d = *this;
1265   unsigned p;
1266   APInt nc, delta, q1, r1, q2, r2;
1267   struct mu magu;
1268   magu.a = 0;               // initialize "add" indicator
1269   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1270   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1271   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1272 
1273   nc = allOnes - (allOnes - d).urem(d);
1274   p = d.getBitWidth() - 1;  // initialize p
1275   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1276   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1277   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1278   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1279   do {
1280     p = p + 1;
1281     if (r1.uge(nc - r1)) {
1282       q1 = q1 + q1 + 1;  // update q1
1283       r1 = r1 + r1 - nc; // update r1
1284     }
1285     else {
1286       q1 = q1+q1; // update q1
1287       r1 = r1+r1; // update r1
1288     }
1289     if ((r2 + 1).uge(d - r2)) {
1290       if (q2.uge(signedMax)) magu.a = 1;
1291       q2 = q2+q2 + 1;     // update q2
1292       r2 = r2+r2 + 1 - d; // update r2
1293     }
1294     else {
1295       if (q2.uge(signedMin)) magu.a = 1;
1296       q2 = q2+q2;     // update q2
1297       r2 = r2+r2 + 1; // update r2
1298     }
1299     delta = d - 1 - r2;
1300   } while (p < d.getBitWidth()*2 &&
1301            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1302   magu.m = q2 + 1; // resulting magic number
1303   magu.s = p - d.getBitWidth();  // resulting shift
1304   return magu;
1305 }
1306 
1307 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1308 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1309 /// variables here have the same names as in the algorithm. Comments explain
1310 /// the algorithm and any deviation from it.
1311 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1312                      unsigned m, unsigned n) {
1313   assert(u && "Must provide dividend");
1314   assert(v && "Must provide divisor");
1315   assert(q && "Must provide quotient");
1316   assert(u != v && u != q && v != q && "Must use different memory");
1317   assert(n>1 && "n must be > 1");
1318 
1319   // b denotes the base of the number system. In our case b is 2^32.
1320   const uint64_t b = uint64_t(1) << 32;
1321 
1322 // The DEBUG macros here tend to be spam in the debug output if you're not
1323 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1324 #ifdef KNUTH_DEBUG
1325 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1326 #else
1327 #define DEBUG_KNUTH(X) do {} while(false)
1328 #endif
1329 
1330   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1331   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1332   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1333   DEBUG_KNUTH(dbgs() << " by");
1334   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1335   DEBUG_KNUTH(dbgs() << '\n');
1336   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1337   // u and v by d. Note that we have taken Knuth's advice here to use a power
1338   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1339   // 2 allows us to shift instead of multiply and it is easy to determine the
1340   // shift amount from the leading zeros.  We are basically normalizing the u
1341   // and v so that its high bits are shifted to the top of v's range without
1342   // overflow. Note that this can require an extra word in u so that u must
1343   // be of length m+n+1.
1344   unsigned shift = countLeadingZeros(v[n-1]);
1345   uint32_t v_carry = 0;
1346   uint32_t u_carry = 0;
1347   if (shift) {
1348     for (unsigned i = 0; i < m+n; ++i) {
1349       uint32_t u_tmp = u[i] >> (32 - shift);
1350       u[i] = (u[i] << shift) | u_carry;
1351       u_carry = u_tmp;
1352     }
1353     for (unsigned i = 0; i < n; ++i) {
1354       uint32_t v_tmp = v[i] >> (32 - shift);
1355       v[i] = (v[i] << shift) | v_carry;
1356       v_carry = v_tmp;
1357     }
1358   }
1359   u[m+n] = u_carry;
1360 
1361   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1362   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1363   DEBUG_KNUTH(dbgs() << " by");
1364   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1365   DEBUG_KNUTH(dbgs() << '\n');
1366 
1367   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1368   int j = m;
1369   do {
1370     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1371     // D3. [Calculate q'.].
1372     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1373     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1374     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1375     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1376     // on v[n-2] determines at high speed most of the cases in which the trial
1377     // value qp is one too large, and it eliminates all cases where qp is two
1378     // too large.
1379     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1380     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1381     uint64_t qp = dividend / v[n-1];
1382     uint64_t rp = dividend % v[n-1];
1383     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1384       qp--;
1385       rp += v[n-1];
1386       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1387         qp--;
1388     }
1389     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1390 
1391     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1392     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1393     // consists of a simple multiplication by a one-place number, combined with
1394     // a subtraction.
1395     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1396     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1397     // true value plus b**(n+1), namely as the b's complement of
1398     // the true value, and a "borrow" to the left should be remembered.
1399     int64_t borrow = 0;
1400     for (unsigned i = 0; i < n; ++i) {
1401       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1402       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1403       u[j+i] = Lo_32(subres);
1404       borrow = Hi_32(p) - Hi_32(subres);
1405       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1406                         << ", borrow = " << borrow << '\n');
1407     }
1408     bool isNeg = u[j+n] < borrow;
1409     u[j+n] -= Lo_32(borrow);
1410 
1411     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1412     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1413     DEBUG_KNUTH(dbgs() << '\n');
1414 
1415     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1416     // negative, go to step D6; otherwise go on to step D7.
1417     q[j] = Lo_32(qp);
1418     if (isNeg) {
1419       // D6. [Add back]. The probability that this step is necessary is very
1420       // small, on the order of only 2/b. Make sure that test data accounts for
1421       // this possibility. Decrease q[j] by 1
1422       q[j]--;
1423       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1424       // A carry will occur to the left of u[j+n], and it should be ignored
1425       // since it cancels with the borrow that occurred in D4.
1426       bool carry = false;
1427       for (unsigned i = 0; i < n; i++) {
1428         uint32_t limit = std::min(u[j+i],v[i]);
1429         u[j+i] += v[i] + carry;
1430         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1431       }
1432       u[j+n] += carry;
1433     }
1434     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1435     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1436     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1437 
1438     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1439   } while (--j >= 0);
1440 
1441   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1442   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1443   DEBUG_KNUTH(dbgs() << '\n');
1444 
1445   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1446   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1447   // compute the remainder (urem uses this).
1448   if (r) {
1449     // The value d is expressed by the "shift" value above since we avoided
1450     // multiplication by d by using a shift left. So, all we have to do is
1451     // shift right here.
1452     if (shift) {
1453       uint32_t carry = 0;
1454       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1455       for (int i = n-1; i >= 0; i--) {
1456         r[i] = (u[i] >> shift) | carry;
1457         carry = u[i] << (32 - shift);
1458         DEBUG_KNUTH(dbgs() << " " << r[i]);
1459       }
1460     } else {
1461       for (int i = n-1; i >= 0; i--) {
1462         r[i] = u[i];
1463         DEBUG_KNUTH(dbgs() << " " << r[i]);
1464       }
1465     }
1466     DEBUG_KNUTH(dbgs() << '\n');
1467   }
1468   DEBUG_KNUTH(dbgs() << '\n');
1469 }
1470 
1471 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1472                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1473   assert(lhsWords >= rhsWords && "Fractional result");
1474 
1475   // First, compose the values into an array of 32-bit words instead of
1476   // 64-bit words. This is a necessity of both the "short division" algorithm
1477   // and the Knuth "classical algorithm" which requires there to be native
1478   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1479   // can't use 64-bit operands here because we don't have native results of
1480   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1481   // work on large-endian machines.
1482   unsigned n = rhsWords * 2;
1483   unsigned m = (lhsWords * 2) - n;
1484 
1485   // Allocate space for the temporary values we need either on the stack, if
1486   // it will fit, or on the heap if it won't.
1487   uint32_t SPACE[128];
1488   uint32_t *U = nullptr;
1489   uint32_t *V = nullptr;
1490   uint32_t *Q = nullptr;
1491   uint32_t *R = nullptr;
1492   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1493     U = &SPACE[0];
1494     V = &SPACE[m+n+1];
1495     Q = &SPACE[(m+n+1) + n];
1496     if (Remainder)
1497       R = &SPACE[(m+n+1) + n + (m+n)];
1498   } else {
1499     U = new uint32_t[m + n + 1];
1500     V = new uint32_t[n];
1501     Q = new uint32_t[m+n];
1502     if (Remainder)
1503       R = new uint32_t[n];
1504   }
1505 
1506   // Initialize the dividend
1507   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1508   for (unsigned i = 0; i < lhsWords; ++i) {
1509     uint64_t tmp = LHS[i];
1510     U[i * 2] = Lo_32(tmp);
1511     U[i * 2 + 1] = Hi_32(tmp);
1512   }
1513   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1514 
1515   // Initialize the divisor
1516   memset(V, 0, (n)*sizeof(uint32_t));
1517   for (unsigned i = 0; i < rhsWords; ++i) {
1518     uint64_t tmp = RHS[i];
1519     V[i * 2] = Lo_32(tmp);
1520     V[i * 2 + 1] = Hi_32(tmp);
1521   }
1522 
1523   // initialize the quotient and remainder
1524   memset(Q, 0, (m+n) * sizeof(uint32_t));
1525   if (Remainder)
1526     memset(R, 0, n * sizeof(uint32_t));
1527 
1528   // Now, adjust m and n for the Knuth division. n is the number of words in
1529   // the divisor. m is the number of words by which the dividend exceeds the
1530   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1531   // contain any zero words or the Knuth algorithm fails.
1532   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1533     n--;
1534     m++;
1535   }
1536   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1537     m--;
1538 
1539   // If we're left with only a single word for the divisor, Knuth doesn't work
1540   // so we implement the short division algorithm here. This is much simpler
1541   // and faster because we are certain that we can divide a 64-bit quantity
1542   // by a 32-bit quantity at hardware speed and short division is simply a
1543   // series of such operations. This is just like doing short division but we
1544   // are using base 2^32 instead of base 10.
1545   assert(n != 0 && "Divide by zero?");
1546   if (n == 1) {
1547     uint32_t divisor = V[0];
1548     uint32_t remainder = 0;
1549     for (int i = m; i >= 0; i--) {
1550       uint64_t partial_dividend = Make_64(remainder, U[i]);
1551       if (partial_dividend == 0) {
1552         Q[i] = 0;
1553         remainder = 0;
1554       } else if (partial_dividend < divisor) {
1555         Q[i] = 0;
1556         remainder = Lo_32(partial_dividend);
1557       } else if (partial_dividend == divisor) {
1558         Q[i] = 1;
1559         remainder = 0;
1560       } else {
1561         Q[i] = Lo_32(partial_dividend / divisor);
1562         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1563       }
1564     }
1565     if (R)
1566       R[0] = remainder;
1567   } else {
1568     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1569     // case n > 1.
1570     KnuthDiv(U, V, Q, R, m, n);
1571   }
1572 
1573   // If the caller wants the quotient
1574   if (Quotient) {
1575     for (unsigned i = 0; i < lhsWords; ++i)
1576       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1577   }
1578 
1579   // If the caller wants the remainder
1580   if (Remainder) {
1581     for (unsigned i = 0; i < rhsWords; ++i)
1582       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1583   }
1584 
1585   // Clean up the memory we allocated.
1586   if (U != &SPACE[0]) {
1587     delete [] U;
1588     delete [] V;
1589     delete [] Q;
1590     delete [] R;
1591   }
1592 }
1593 
1594 APInt APInt::udiv(const APInt &RHS) const {
1595   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1596 
1597   // First, deal with the easy case
1598   if (isSingleWord()) {
1599     assert(RHS.U.VAL != 0 && "Divide by zero?");
1600     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1601   }
1602 
1603   // Get some facts about the LHS and RHS number of bits and words
1604   unsigned lhsWords = getNumWords(getActiveBits());
1605   unsigned rhsBits  = RHS.getActiveBits();
1606   unsigned rhsWords = getNumWords(rhsBits);
1607   assert(rhsWords && "Divided by zero???");
1608 
1609   // Deal with some degenerate cases
1610   if (!lhsWords)
1611     // 0 / X ===> 0
1612     return APInt(BitWidth, 0);
1613   if (rhsBits == 1)
1614     // X / 1 ===> X
1615     return *this;
1616   if (lhsWords < rhsWords || this->ult(RHS))
1617     // X / Y ===> 0, iff X < Y
1618     return APInt(BitWidth, 0);
1619   if (*this == RHS)
1620     // X / X ===> 1
1621     return APInt(BitWidth, 1);
1622   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1623     // All high words are zero, just use native divide
1624     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1625 
1626   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1627   APInt Quotient(BitWidth, 0); // to hold result.
1628   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1629   return Quotient;
1630 }
1631 
1632 APInt APInt::udiv(uint64_t RHS) const {
1633   assert(RHS != 0 && "Divide by zero?");
1634 
1635   // First, deal with the easy case
1636   if (isSingleWord())
1637     return APInt(BitWidth, U.VAL / RHS);
1638 
1639   // Get some facts about the LHS words.
1640   unsigned lhsWords = getNumWords(getActiveBits());
1641 
1642   // Deal with some degenerate cases
1643   if (!lhsWords)
1644     // 0 / X ===> 0
1645     return APInt(BitWidth, 0);
1646   if (RHS == 1)
1647     // X / 1 ===> X
1648     return *this;
1649   if (this->ult(RHS))
1650     // X / Y ===> 0, iff X < Y
1651     return APInt(BitWidth, 0);
1652   if (*this == RHS)
1653     // X / X ===> 1
1654     return APInt(BitWidth, 1);
1655   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1656     // All high words are zero, just use native divide
1657     return APInt(BitWidth, this->U.pVal[0] / RHS);
1658 
1659   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1660   APInt Quotient(BitWidth, 0); // to hold result.
1661   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1662   return Quotient;
1663 }
1664 
1665 APInt APInt::sdiv(const APInt &RHS) const {
1666   if (isNegative()) {
1667     if (RHS.isNegative())
1668       return (-(*this)).udiv(-RHS);
1669     return -((-(*this)).udiv(RHS));
1670   }
1671   if (RHS.isNegative())
1672     return -(this->udiv(-RHS));
1673   return this->udiv(RHS);
1674 }
1675 
1676 APInt APInt::sdiv(int64_t RHS) const {
1677   if (isNegative()) {
1678     if (RHS < 0)
1679       return (-(*this)).udiv(-RHS);
1680     return -((-(*this)).udiv(RHS));
1681   }
1682   if (RHS < 0)
1683     return -(this->udiv(-RHS));
1684   return this->udiv(RHS);
1685 }
1686 
1687 APInt APInt::urem(const APInt &RHS) const {
1688   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1689   if (isSingleWord()) {
1690     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1691     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1692   }
1693 
1694   // Get some facts about the LHS
1695   unsigned lhsWords = getNumWords(getActiveBits());
1696 
1697   // Get some facts about the RHS
1698   unsigned rhsBits = RHS.getActiveBits();
1699   unsigned rhsWords = getNumWords(rhsBits);
1700   assert(rhsWords && "Performing remainder operation by zero ???");
1701 
1702   // Check the degenerate cases
1703   if (lhsWords == 0)
1704     // 0 % Y ===> 0
1705     return APInt(BitWidth, 0);
1706   if (rhsBits == 1)
1707     // X % 1 ===> 0
1708     return APInt(BitWidth, 0);
1709   if (lhsWords < rhsWords || this->ult(RHS))
1710     // X % Y ===> X, iff X < Y
1711     return *this;
1712   if (*this == RHS)
1713     // X % X == 0;
1714     return APInt(BitWidth, 0);
1715   if (lhsWords == 1)
1716     // All high words are zero, just use native remainder
1717     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1718 
1719   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1720   APInt Remainder(BitWidth, 0);
1721   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1722   return Remainder;
1723 }
1724 
1725 uint64_t APInt::urem(uint64_t RHS) const {
1726   assert(RHS != 0 && "Remainder by zero?");
1727 
1728   if (isSingleWord())
1729     return U.VAL % RHS;
1730 
1731   // Get some facts about the LHS
1732   unsigned lhsWords = getNumWords(getActiveBits());
1733 
1734   // Check the degenerate cases
1735   if (lhsWords == 0)
1736     // 0 % Y ===> 0
1737     return 0;
1738   if (RHS == 1)
1739     // X % 1 ===> 0
1740     return 0;
1741   if (this->ult(RHS))
1742     // X % Y ===> X, iff X < Y
1743     return getZExtValue();
1744   if (*this == RHS)
1745     // X % X == 0;
1746     return 0;
1747   if (lhsWords == 1)
1748     // All high words are zero, just use native remainder
1749     return U.pVal[0] % RHS;
1750 
1751   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1752   uint64_t Remainder;
1753   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1754   return Remainder;
1755 }
1756 
1757 APInt APInt::srem(const APInt &RHS) const {
1758   if (isNegative()) {
1759     if (RHS.isNegative())
1760       return -((-(*this)).urem(-RHS));
1761     return -((-(*this)).urem(RHS));
1762   }
1763   if (RHS.isNegative())
1764     return this->urem(-RHS);
1765   return this->urem(RHS);
1766 }
1767 
1768 int64_t APInt::srem(int64_t RHS) const {
1769   if (isNegative()) {
1770     if (RHS < 0)
1771       return -((-(*this)).urem(-RHS));
1772     return -((-(*this)).urem(RHS));
1773   }
1774   if (RHS < 0)
1775     return this->urem(-RHS);
1776   return this->urem(RHS);
1777 }
1778 
1779 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1780                     APInt &Quotient, APInt &Remainder) {
1781   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1782   unsigned BitWidth = LHS.BitWidth;
1783 
1784   // First, deal with the easy case
1785   if (LHS.isSingleWord()) {
1786     assert(RHS.U.VAL != 0 && "Divide by zero?");
1787     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1788     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1789     Quotient = APInt(BitWidth, QuotVal);
1790     Remainder = APInt(BitWidth, RemVal);
1791     return;
1792   }
1793 
1794   // Get some size facts about the dividend and divisor
1795   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1796   unsigned rhsBits  = RHS.getActiveBits();
1797   unsigned rhsWords = getNumWords(rhsBits);
1798   assert(rhsWords && "Performing divrem operation by zero ???");
1799 
1800   // Check the degenerate cases
1801   if (lhsWords == 0) {
1802     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1803     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1804     return;
1805   }
1806 
1807   if (rhsBits == 1) {
1808     Quotient = LHS;                   // X / 1 ===> X
1809     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1810   }
1811 
1812   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1813     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1814     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1815     return;
1816   }
1817 
1818   if (LHS == RHS) {
1819     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1820     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1821     return;
1822   }
1823 
1824   // Make sure there is enough space to hold the results.
1825   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1826   // change the size. This is necessary if Quotient or Remainder is aliased
1827   // with LHS or RHS.
1828   Quotient.reallocate(BitWidth);
1829   Remainder.reallocate(BitWidth);
1830 
1831   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1832     // There is only one word to consider so use the native versions.
1833     uint64_t lhsValue = LHS.U.pVal[0];
1834     uint64_t rhsValue = RHS.U.pVal[0];
1835     Quotient = lhsValue / rhsValue;
1836     Remainder = lhsValue % rhsValue;
1837     return;
1838   }
1839 
1840   // Okay, lets do it the long way
1841   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1842          Remainder.U.pVal);
1843   // Clear the rest of the Quotient and Remainder.
1844   std::memset(Quotient.U.pVal + lhsWords, 0,
1845               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1846   std::memset(Remainder.U.pVal + rhsWords, 0,
1847               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1848 }
1849 
1850 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1851                     uint64_t &Remainder) {
1852   assert(RHS != 0 && "Divide by zero?");
1853   unsigned BitWidth = LHS.BitWidth;
1854 
1855   // First, deal with the easy case
1856   if (LHS.isSingleWord()) {
1857     uint64_t QuotVal = LHS.U.VAL / RHS;
1858     Remainder = LHS.U.VAL % RHS;
1859     Quotient = APInt(BitWidth, QuotVal);
1860     return;
1861   }
1862 
1863   // Get some size facts about the dividend and divisor
1864   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1865 
1866   // Check the degenerate cases
1867   if (lhsWords == 0) {
1868     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1869     Remainder = 0;                    // 0 % Y ===> 0
1870     return;
1871   }
1872 
1873   if (RHS == 1) {
1874     Quotient = LHS;                   // X / 1 ===> X
1875     Remainder = 0;                    // X % 1 ===> 0
1876     return;
1877   }
1878 
1879   if (LHS.ult(RHS)) {
1880     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1881     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1882     return;
1883   }
1884 
1885   if (LHS == RHS) {
1886     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1887     Remainder = 0;                    // X % X ===> 0;
1888     return;
1889   }
1890 
1891   // Make sure there is enough space to hold the results.
1892   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1893   // change the size. This is necessary if Quotient is aliased with LHS.
1894   Quotient.reallocate(BitWidth);
1895 
1896   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1897     // There is only one word to consider so use the native versions.
1898     uint64_t lhsValue = LHS.U.pVal[0];
1899     Quotient = lhsValue / RHS;
1900     Remainder = lhsValue % RHS;
1901     return;
1902   }
1903 
1904   // Okay, lets do it the long way
1905   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1906   // Clear the rest of the Quotient.
1907   std::memset(Quotient.U.pVal + lhsWords, 0,
1908               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1909 }
1910 
1911 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1912                     APInt &Quotient, APInt &Remainder) {
1913   if (LHS.isNegative()) {
1914     if (RHS.isNegative())
1915       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1916     else {
1917       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1918       Quotient.negate();
1919     }
1920     Remainder.negate();
1921   } else if (RHS.isNegative()) {
1922     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1923     Quotient.negate();
1924   } else {
1925     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1926   }
1927 }
1928 
1929 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1930                     APInt &Quotient, int64_t &Remainder) {
1931   uint64_t R = Remainder;
1932   if (LHS.isNegative()) {
1933     if (RHS < 0)
1934       APInt::udivrem(-LHS, -RHS, Quotient, R);
1935     else {
1936       APInt::udivrem(-LHS, RHS, Quotient, R);
1937       Quotient.negate();
1938     }
1939     R = -R;
1940   } else if (RHS < 0) {
1941     APInt::udivrem(LHS, -RHS, Quotient, R);
1942     Quotient.negate();
1943   } else {
1944     APInt::udivrem(LHS, RHS, Quotient, R);
1945   }
1946   Remainder = R;
1947 }
1948 
1949 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1950   APInt Res = *this+RHS;
1951   Overflow = isNonNegative() == RHS.isNonNegative() &&
1952              Res.isNonNegative() != isNonNegative();
1953   return Res;
1954 }
1955 
1956 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1957   APInt Res = *this+RHS;
1958   Overflow = Res.ult(RHS);
1959   return Res;
1960 }
1961 
1962 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1963   APInt Res = *this - RHS;
1964   Overflow = isNonNegative() != RHS.isNonNegative() &&
1965              Res.isNonNegative() != isNonNegative();
1966   return Res;
1967 }
1968 
1969 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1970   APInt Res = *this-RHS;
1971   Overflow = Res.ugt(*this);
1972   return Res;
1973 }
1974 
1975 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1976   // MININT/-1  -->  overflow.
1977   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1978   return sdiv(RHS);
1979 }
1980 
1981 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1982   APInt Res = *this * RHS;
1983 
1984   if (*this != 0 && RHS != 0)
1985     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1986   else
1987     Overflow = false;
1988   return Res;
1989 }
1990 
1991 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1992   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1993     Overflow = true;
1994     return *this * RHS;
1995   }
1996 
1997   APInt Res = lshr(1) * RHS;
1998   Overflow = Res.isNegative();
1999   Res <<= 1;
2000   if ((*this)[0]) {
2001     Res += RHS;
2002     if (Res.ult(RHS))
2003       Overflow = true;
2004   }
2005   return Res;
2006 }
2007 
2008 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
2009   Overflow = ShAmt.uge(getBitWidth());
2010   if (Overflow)
2011     return APInt(BitWidth, 0);
2012 
2013   if (isNonNegative()) // Don't allow sign change.
2014     Overflow = ShAmt.uge(countLeadingZeros());
2015   else
2016     Overflow = ShAmt.uge(countLeadingOnes());
2017 
2018   return *this << ShAmt;
2019 }
2020 
2021 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2022   Overflow = ShAmt.uge(getBitWidth());
2023   if (Overflow)
2024     return APInt(BitWidth, 0);
2025 
2026   Overflow = ShAmt.ugt(countLeadingZeros());
2027 
2028   return *this << ShAmt;
2029 }
2030 
2031 APInt APInt::sadd_sat(const APInt &RHS) const {
2032   bool Overflow;
2033   APInt Res = sadd_ov(RHS, Overflow);
2034   if (!Overflow)
2035     return Res;
2036 
2037   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2038                       : APInt::getSignedMaxValue(BitWidth);
2039 }
2040 
2041 APInt APInt::uadd_sat(const APInt &RHS) const {
2042   bool Overflow;
2043   APInt Res = uadd_ov(RHS, Overflow);
2044   if (!Overflow)
2045     return Res;
2046 
2047   return APInt::getMaxValue(BitWidth);
2048 }
2049 
2050 APInt APInt::ssub_sat(const APInt &RHS) const {
2051   bool Overflow;
2052   APInt Res = ssub_ov(RHS, Overflow);
2053   if (!Overflow)
2054     return Res;
2055 
2056   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2057                       : APInt::getSignedMaxValue(BitWidth);
2058 }
2059 
2060 APInt APInt::usub_sat(const APInt &RHS) const {
2061   bool Overflow;
2062   APInt Res = usub_ov(RHS, Overflow);
2063   if (!Overflow)
2064     return Res;
2065 
2066   return APInt(BitWidth, 0);
2067 }
2068 
2069 APInt APInt::smul_sat(const APInt &RHS) const {
2070   bool Overflow;
2071   APInt Res = smul_ov(RHS, Overflow);
2072   if (!Overflow)
2073     return Res;
2074 
2075   // The result is negative if one and only one of inputs is negative.
2076   bool ResIsNegative = isNegative() ^ RHS.isNegative();
2077 
2078   return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2079                        : APInt::getSignedMaxValue(BitWidth);
2080 }
2081 
2082 APInt APInt::umul_sat(const APInt &RHS) const {
2083   bool Overflow;
2084   APInt Res = umul_ov(RHS, Overflow);
2085   if (!Overflow)
2086     return Res;
2087 
2088   return APInt::getMaxValue(BitWidth);
2089 }
2090 
2091 APInt APInt::sshl_sat(const APInt &RHS) const {
2092   bool Overflow;
2093   APInt Res = sshl_ov(RHS, Overflow);
2094   if (!Overflow)
2095     return Res;
2096 
2097   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2098                       : APInt::getSignedMaxValue(BitWidth);
2099 }
2100 
2101 APInt APInt::ushl_sat(const APInt &RHS) const {
2102   bool Overflow;
2103   APInt Res = ushl_ov(RHS, Overflow);
2104   if (!Overflow)
2105     return Res;
2106 
2107   return APInt::getMaxValue(BitWidth);
2108 }
2109 
2110 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2111   // Check our assumptions here
2112   assert(!str.empty() && "Invalid string length");
2113   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2114           radix == 36) &&
2115          "Radix should be 2, 8, 10, 16, or 36!");
2116 
2117   StringRef::iterator p = str.begin();
2118   size_t slen = str.size();
2119   bool isNeg = *p == '-';
2120   if (*p == '-' || *p == '+') {
2121     p++;
2122     slen--;
2123     assert(slen && "String is only a sign, needs a value.");
2124   }
2125   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2126   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2127   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2128   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2129          "Insufficient bit width");
2130 
2131   // Allocate memory if needed
2132   if (isSingleWord())
2133     U.VAL = 0;
2134   else
2135     U.pVal = getClearedMemory(getNumWords());
2136 
2137   // Figure out if we can shift instead of multiply
2138   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2139 
2140   // Enter digit traversal loop
2141   for (StringRef::iterator e = str.end(); p != e; ++p) {
2142     unsigned digit = getDigit(*p, radix);
2143     assert(digit < radix && "Invalid character in digit string");
2144 
2145     // Shift or multiply the value by the radix
2146     if (slen > 1) {
2147       if (shift)
2148         *this <<= shift;
2149       else
2150         *this *= radix;
2151     }
2152 
2153     // Add in the digit we just interpreted
2154     *this += digit;
2155   }
2156   // If its negative, put it in two's complement form
2157   if (isNeg)
2158     this->negate();
2159 }
2160 
2161 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2162                      bool Signed, bool formatAsCLiteral) const {
2163   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2164           Radix == 36) &&
2165          "Radix should be 2, 8, 10, 16, or 36!");
2166 
2167   const char *Prefix = "";
2168   if (formatAsCLiteral) {
2169     switch (Radix) {
2170       case 2:
2171         // Binary literals are a non-standard extension added in gcc 4.3:
2172         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2173         Prefix = "0b";
2174         break;
2175       case 8:
2176         Prefix = "0";
2177         break;
2178       case 10:
2179         break; // No prefix
2180       case 16:
2181         Prefix = "0x";
2182         break;
2183       default:
2184         llvm_unreachable("Invalid radix!");
2185     }
2186   }
2187 
2188   // First, check for a zero value and just short circuit the logic below.
2189   if (*this == 0) {
2190     while (*Prefix) {
2191       Str.push_back(*Prefix);
2192       ++Prefix;
2193     };
2194     Str.push_back('0');
2195     return;
2196   }
2197 
2198   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2199 
2200   if (isSingleWord()) {
2201     char Buffer[65];
2202     char *BufPtr = std::end(Buffer);
2203 
2204     uint64_t N;
2205     if (!Signed) {
2206       N = getZExtValue();
2207     } else {
2208       int64_t I = getSExtValue();
2209       if (I >= 0) {
2210         N = I;
2211       } else {
2212         Str.push_back('-');
2213         N = -(uint64_t)I;
2214       }
2215     }
2216 
2217     while (*Prefix) {
2218       Str.push_back(*Prefix);
2219       ++Prefix;
2220     };
2221 
2222     while (N) {
2223       *--BufPtr = Digits[N % Radix];
2224       N /= Radix;
2225     }
2226     Str.append(BufPtr, std::end(Buffer));
2227     return;
2228   }
2229 
2230   APInt Tmp(*this);
2231 
2232   if (Signed && isNegative()) {
2233     // They want to print the signed version and it is a negative value
2234     // Flip the bits and add one to turn it into the equivalent positive
2235     // value and put a '-' in the result.
2236     Tmp.negate();
2237     Str.push_back('-');
2238   }
2239 
2240   while (*Prefix) {
2241     Str.push_back(*Prefix);
2242     ++Prefix;
2243   };
2244 
2245   // We insert the digits backward, then reverse them to get the right order.
2246   unsigned StartDig = Str.size();
2247 
2248   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2249   // because the number of bits per digit (1, 3 and 4 respectively) divides
2250   // equally.  We just shift until the value is zero.
2251   if (Radix == 2 || Radix == 8 || Radix == 16) {
2252     // Just shift tmp right for each digit width until it becomes zero
2253     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2254     unsigned MaskAmt = Radix - 1;
2255 
2256     while (Tmp.getBoolValue()) {
2257       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2258       Str.push_back(Digits[Digit]);
2259       Tmp.lshrInPlace(ShiftAmt);
2260     }
2261   } else {
2262     while (Tmp.getBoolValue()) {
2263       uint64_t Digit;
2264       udivrem(Tmp, Radix, Tmp, Digit);
2265       assert(Digit < Radix && "divide failed");
2266       Str.push_back(Digits[Digit]);
2267     }
2268   }
2269 
2270   // Reverse the digits before returning.
2271   std::reverse(Str.begin()+StartDig, Str.end());
2272 }
2273 
2274 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2275 /// It is better to pass in a SmallVector/SmallString to the methods above.
2276 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2277   SmallString<40> S;
2278   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2279   return std::string(S.str());
2280 }
2281 
2282 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2283 LLVM_DUMP_METHOD void APInt::dump() const {
2284   SmallString<40> S, U;
2285   this->toStringUnsigned(U);
2286   this->toStringSigned(S);
2287   dbgs() << "APInt(" << BitWidth << "b, "
2288          << U << "u " << S << "s)\n";
2289 }
2290 #endif
2291 
2292 void APInt::print(raw_ostream &OS, bool isSigned) const {
2293   SmallString<40> S;
2294   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2295   OS << S;
2296 }
2297 
2298 // This implements a variety of operations on a representation of
2299 // arbitrary precision, two's-complement, bignum integer values.
2300 
2301 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2302 // and unrestricting assumption.
2303 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2304               "Part width must be divisible by 2!");
2305 
2306 /* Some handy functions local to this file.  */
2307 
2308 /* Returns the integer part with the least significant BITS set.
2309    BITS cannot be zero.  */
2310 static inline APInt::WordType lowBitMask(unsigned bits) {
2311   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2312 
2313   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2314 }
2315 
2316 /* Returns the value of the lower half of PART.  */
2317 static inline APInt::WordType lowHalf(APInt::WordType part) {
2318   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2319 }
2320 
2321 /* Returns the value of the upper half of PART.  */
2322 static inline APInt::WordType highHalf(APInt::WordType part) {
2323   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2324 }
2325 
2326 /* Returns the bit number of the most significant set bit of a part.
2327    If the input number has no bits set -1U is returned.  */
2328 static unsigned partMSB(APInt::WordType value) {
2329   return findLastSet(value, ZB_Max);
2330 }
2331 
2332 /* Returns the bit number of the least significant set bit of a
2333    part.  If the input number has no bits set -1U is returned.  */
2334 static unsigned partLSB(APInt::WordType value) {
2335   return findFirstSet(value, ZB_Max);
2336 }
2337 
2338 /* Sets the least significant part of a bignum to the input value, and
2339    zeroes out higher parts.  */
2340 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2341   assert(parts > 0);
2342 
2343   dst[0] = part;
2344   for (unsigned i = 1; i < parts; i++)
2345     dst[i] = 0;
2346 }
2347 
2348 /* Assign one bignum to another.  */
2349 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2350   for (unsigned i = 0; i < parts; i++)
2351     dst[i] = src[i];
2352 }
2353 
2354 /* Returns true if a bignum is zero, false otherwise.  */
2355 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2356   for (unsigned i = 0; i < parts; i++)
2357     if (src[i])
2358       return false;
2359 
2360   return true;
2361 }
2362 
2363 /* Extract the given bit of a bignum; returns 0 or 1.  */
2364 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2365   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2366 }
2367 
2368 /* Set the given bit of a bignum. */
2369 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2370   parts[whichWord(bit)] |= maskBit(bit);
2371 }
2372 
2373 /* Clears the given bit of a bignum. */
2374 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2375   parts[whichWord(bit)] &= ~maskBit(bit);
2376 }
2377 
2378 /* Returns the bit number of the least significant set bit of a
2379    number.  If the input number has no bits set -1U is returned.  */
2380 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2381   for (unsigned i = 0; i < n; i++) {
2382     if (parts[i] != 0) {
2383       unsigned lsb = partLSB(parts[i]);
2384 
2385       return lsb + i * APINT_BITS_PER_WORD;
2386     }
2387   }
2388 
2389   return -1U;
2390 }
2391 
2392 /* Returns the bit number of the most significant set bit of a number.
2393    If the input number has no bits set -1U is returned.  */
2394 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2395   do {
2396     --n;
2397 
2398     if (parts[n] != 0) {
2399       unsigned msb = partMSB(parts[n]);
2400 
2401       return msb + n * APINT_BITS_PER_WORD;
2402     }
2403   } while (n);
2404 
2405   return -1U;
2406 }
2407 
2408 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2409    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2410    the least significant bit of DST.  All high bits above srcBITS in
2411    DST are zero-filled.  */
2412 void
2413 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2414                  unsigned srcBits, unsigned srcLSB) {
2415   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2416   assert(dstParts <= dstCount);
2417 
2418   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2419   tcAssign (dst, src + firstSrcPart, dstParts);
2420 
2421   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2422   tcShiftRight (dst, dstParts, shift);
2423 
2424   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2425      in DST.  If this is less that srcBits, append the rest, else
2426      clear the high bits.  */
2427   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2428   if (n < srcBits) {
2429     WordType mask = lowBitMask (srcBits - n);
2430     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2431                           << n % APINT_BITS_PER_WORD);
2432   } else if (n > srcBits) {
2433     if (srcBits % APINT_BITS_PER_WORD)
2434       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2435   }
2436 
2437   /* Clear high parts.  */
2438   while (dstParts < dstCount)
2439     dst[dstParts++] = 0;
2440 }
2441 
2442 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2443 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2444                              WordType c, unsigned parts) {
2445   assert(c <= 1);
2446 
2447   for (unsigned i = 0; i < parts; i++) {
2448     WordType l = dst[i];
2449     if (c) {
2450       dst[i] += rhs[i] + 1;
2451       c = (dst[i] <= l);
2452     } else {
2453       dst[i] += rhs[i];
2454       c = (dst[i] < l);
2455     }
2456   }
2457 
2458   return c;
2459 }
2460 
2461 /// This function adds a single "word" integer, src, to the multiple
2462 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2463 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2464 /// @returns the carry of the addition.
2465 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2466                                  unsigned parts) {
2467   for (unsigned i = 0; i < parts; ++i) {
2468     dst[i] += src;
2469     if (dst[i] >= src)
2470       return 0; // No need to carry so exit early.
2471     src = 1; // Carry one to next digit.
2472   }
2473 
2474   return 1;
2475 }
2476 
2477 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2478 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2479                                   WordType c, unsigned parts) {
2480   assert(c <= 1);
2481 
2482   for (unsigned i = 0; i < parts; i++) {
2483     WordType l = dst[i];
2484     if (c) {
2485       dst[i] -= rhs[i] + 1;
2486       c = (dst[i] >= l);
2487     } else {
2488       dst[i] -= rhs[i];
2489       c = (dst[i] > l);
2490     }
2491   }
2492 
2493   return c;
2494 }
2495 
2496 /// This function subtracts a single "word" (64-bit word), src, from
2497 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2498 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2499 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2500 /// exhausted. In other words, if src > dst then this function returns 1,
2501 /// otherwise 0.
2502 /// @returns the borrow out of the subtraction
2503 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2504                                       unsigned parts) {
2505   for (unsigned i = 0; i < parts; ++i) {
2506     WordType Dst = dst[i];
2507     dst[i] -= src;
2508     if (src <= Dst)
2509       return 0; // No need to borrow so exit early.
2510     src = 1; // We have to "borrow 1" from next "word"
2511   }
2512 
2513   return 1;
2514 }
2515 
2516 /* Negate a bignum in-place.  */
2517 void APInt::tcNegate(WordType *dst, unsigned parts) {
2518   tcComplement(dst, parts);
2519   tcIncrement(dst, parts);
2520 }
2521 
2522 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2523     DST  = SRC * MULTIPLIER + CARRY   if add is false
2524 
2525     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2526     they must start at the same point, i.e. DST == SRC.
2527 
2528     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2529     returned.  Otherwise DST is filled with the least significant
2530     DSTPARTS parts of the result, and if all of the omitted higher
2531     parts were zero return zero, otherwise overflow occurred and
2532     return one.  */
2533 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2534                           WordType multiplier, WordType carry,
2535                           unsigned srcParts, unsigned dstParts,
2536                           bool add) {
2537   /* Otherwise our writes of DST kill our later reads of SRC.  */
2538   assert(dst <= src || dst >= src + srcParts);
2539   assert(dstParts <= srcParts + 1);
2540 
2541   /* N loops; minimum of dstParts and srcParts.  */
2542   unsigned n = std::min(dstParts, srcParts);
2543 
2544   for (unsigned i = 0; i < n; i++) {
2545     WordType low, mid, high, srcPart;
2546 
2547       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2548 
2549          This cannot overflow, because
2550 
2551          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2552 
2553          which is less than n^2.  */
2554 
2555     srcPart = src[i];
2556 
2557     if (multiplier == 0 || srcPart == 0) {
2558       low = carry;
2559       high = 0;
2560     } else {
2561       low = lowHalf(srcPart) * lowHalf(multiplier);
2562       high = highHalf(srcPart) * highHalf(multiplier);
2563 
2564       mid = lowHalf(srcPart) * highHalf(multiplier);
2565       high += highHalf(mid);
2566       mid <<= APINT_BITS_PER_WORD / 2;
2567       if (low + mid < low)
2568         high++;
2569       low += mid;
2570 
2571       mid = highHalf(srcPart) * lowHalf(multiplier);
2572       high += highHalf(mid);
2573       mid <<= APINT_BITS_PER_WORD / 2;
2574       if (low + mid < low)
2575         high++;
2576       low += mid;
2577 
2578       /* Now add carry.  */
2579       if (low + carry < low)
2580         high++;
2581       low += carry;
2582     }
2583 
2584     if (add) {
2585       /* And now DST[i], and store the new low part there.  */
2586       if (low + dst[i] < low)
2587         high++;
2588       dst[i] += low;
2589     } else
2590       dst[i] = low;
2591 
2592     carry = high;
2593   }
2594 
2595   if (srcParts < dstParts) {
2596     /* Full multiplication, there is no overflow.  */
2597     assert(srcParts + 1 == dstParts);
2598     dst[srcParts] = carry;
2599     return 0;
2600   }
2601 
2602   /* We overflowed if there is carry.  */
2603   if (carry)
2604     return 1;
2605 
2606   /* We would overflow if any significant unwritten parts would be
2607      non-zero.  This is true if any remaining src parts are non-zero
2608      and the multiplier is non-zero.  */
2609   if (multiplier)
2610     for (unsigned i = dstParts; i < srcParts; i++)
2611       if (src[i])
2612         return 1;
2613 
2614   /* We fitted in the narrow destination.  */
2615   return 0;
2616 }
2617 
2618 /* DST = LHS * RHS, where DST has the same width as the operands and
2619    is filled with the least significant parts of the result.  Returns
2620    one if overflow occurred, otherwise zero.  DST must be disjoint
2621    from both operands.  */
2622 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2623                       const WordType *rhs, unsigned parts) {
2624   assert(dst != lhs && dst != rhs);
2625 
2626   int overflow = 0;
2627   tcSet(dst, 0, parts);
2628 
2629   for (unsigned i = 0; i < parts; i++)
2630     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2631                                parts - i, true);
2632 
2633   return overflow;
2634 }
2635 
2636 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2637 /// operands. No overflow occurs. DST must be disjoint from both operands.
2638 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2639                            const WordType *rhs, unsigned lhsParts,
2640                            unsigned rhsParts) {
2641   /* Put the narrower number on the LHS for less loops below.  */
2642   if (lhsParts > rhsParts)
2643     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2644 
2645   assert(dst != lhs && dst != rhs);
2646 
2647   tcSet(dst, 0, rhsParts);
2648 
2649   for (unsigned i = 0; i < lhsParts; i++)
2650     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2651 }
2652 
2653 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2654    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2655    set REMAINDER to the remainder, return zero.  i.e.
2656 
2657    OLD_LHS = RHS * LHS + REMAINDER
2658 
2659    SCRATCH is a bignum of the same size as the operands and result for
2660    use by the routine; its contents need not be initialized and are
2661    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2662 */
2663 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2664                     WordType *remainder, WordType *srhs,
2665                     unsigned parts) {
2666   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2667 
2668   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2669   if (shiftCount == 0)
2670     return true;
2671 
2672   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2673   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2674   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2675 
2676   tcAssign(srhs, rhs, parts);
2677   tcShiftLeft(srhs, parts, shiftCount);
2678   tcAssign(remainder, lhs, parts);
2679   tcSet(lhs, 0, parts);
2680 
2681   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2682      the total.  */
2683   for (;;) {
2684     int compare = tcCompare(remainder, srhs, parts);
2685     if (compare >= 0) {
2686       tcSubtract(remainder, srhs, 0, parts);
2687       lhs[n] |= mask;
2688     }
2689 
2690     if (shiftCount == 0)
2691       break;
2692     shiftCount--;
2693     tcShiftRight(srhs, parts, 1);
2694     if ((mask >>= 1) == 0) {
2695       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2696       n--;
2697     }
2698   }
2699 
2700   return false;
2701 }
2702 
2703 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2704 /// no restrictions on Count.
2705 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2706   // Don't bother performing a no-op shift.
2707   if (!Count)
2708     return;
2709 
2710   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2711   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2712   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2713 
2714   // Fastpath for moving by whole words.
2715   if (BitShift == 0) {
2716     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2717   } else {
2718     while (Words-- > WordShift) {
2719       Dst[Words] = Dst[Words - WordShift] << BitShift;
2720       if (Words > WordShift)
2721         Dst[Words] |=
2722           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2723     }
2724   }
2725 
2726   // Fill in the remainder with 0s.
2727   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2728 }
2729 
2730 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2731 /// are no restrictions on Count.
2732 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2733   // Don't bother performing a no-op shift.
2734   if (!Count)
2735     return;
2736 
2737   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2738   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2739   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2740 
2741   unsigned WordsToMove = Words - WordShift;
2742   // Fastpath for moving by whole words.
2743   if (BitShift == 0) {
2744     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2745   } else {
2746     for (unsigned i = 0; i != WordsToMove; ++i) {
2747       Dst[i] = Dst[i + WordShift] >> BitShift;
2748       if (i + 1 != WordsToMove)
2749         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2750     }
2751   }
2752 
2753   // Fill in the remainder with 0s.
2754   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2755 }
2756 
2757 /* Bitwise and of two bignums.  */
2758 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2759   for (unsigned i = 0; i < parts; i++)
2760     dst[i] &= rhs[i];
2761 }
2762 
2763 /* Bitwise inclusive or of two bignums.  */
2764 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2765   for (unsigned i = 0; i < parts; i++)
2766     dst[i] |= rhs[i];
2767 }
2768 
2769 /* Bitwise exclusive or of two bignums.  */
2770 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2771   for (unsigned i = 0; i < parts; i++)
2772     dst[i] ^= rhs[i];
2773 }
2774 
2775 /* Complement a bignum in-place.  */
2776 void APInt::tcComplement(WordType *dst, unsigned parts) {
2777   for (unsigned i = 0; i < parts; i++)
2778     dst[i] = ~dst[i];
2779 }
2780 
2781 /* Comparison (unsigned) of two bignums.  */
2782 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2783                      unsigned parts) {
2784   while (parts) {
2785     parts--;
2786     if (lhs[parts] != rhs[parts])
2787       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2788   }
2789 
2790   return 0;
2791 }
2792 
2793 /* Set the least significant BITS bits of a bignum, clear the
2794    rest.  */
2795 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2796                                       unsigned bits) {
2797   unsigned i = 0;
2798   while (bits > APINT_BITS_PER_WORD) {
2799     dst[i++] = ~(WordType) 0;
2800     bits -= APINT_BITS_PER_WORD;
2801   }
2802 
2803   if (bits)
2804     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2805 
2806   while (i < parts)
2807     dst[i++] = 0;
2808 }
2809 
2810 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2811                                    APInt::Rounding RM) {
2812   // Currently udivrem always rounds down.
2813   switch (RM) {
2814   case APInt::Rounding::DOWN:
2815   case APInt::Rounding::TOWARD_ZERO:
2816     return A.udiv(B);
2817   case APInt::Rounding::UP: {
2818     APInt Quo, Rem;
2819     APInt::udivrem(A, B, Quo, Rem);
2820     if (Rem == 0)
2821       return Quo;
2822     return Quo + 1;
2823   }
2824   }
2825   llvm_unreachable("Unknown APInt::Rounding enum");
2826 }
2827 
2828 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2829                                    APInt::Rounding RM) {
2830   switch (RM) {
2831   case APInt::Rounding::DOWN:
2832   case APInt::Rounding::UP: {
2833     APInt Quo, Rem;
2834     APInt::sdivrem(A, B, Quo, Rem);
2835     if (Rem == 0)
2836       return Quo;
2837     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2838     // We want to check whether the non-integer part of the mathematical value
2839     // is negative or not. If the non-integer part is negative, we need to round
2840     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2841     // already rounded down.
2842     if (RM == APInt::Rounding::DOWN) {
2843       if (Rem.isNegative() != B.isNegative())
2844         return Quo - 1;
2845       return Quo;
2846     }
2847     if (Rem.isNegative() != B.isNegative())
2848       return Quo;
2849     return Quo + 1;
2850   }
2851   // Currently sdiv rounds towards zero.
2852   case APInt::Rounding::TOWARD_ZERO:
2853     return A.sdiv(B);
2854   }
2855   llvm_unreachable("Unknown APInt::Rounding enum");
2856 }
2857 
2858 Optional<APInt>
2859 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2860                                            unsigned RangeWidth) {
2861   unsigned CoeffWidth = A.getBitWidth();
2862   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2863   assert(RangeWidth <= CoeffWidth &&
2864          "Value range width should be less than coefficient width");
2865   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2866 
2867   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2868                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2869 
2870   // Identify 0 as a (non)solution immediately.
2871   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2872     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2873     return APInt(CoeffWidth, 0);
2874   }
2875 
2876   // The result of APInt arithmetic has the same bit width as the operands,
2877   // so it can actually lose high bits. A product of two n-bit integers needs
2878   // 2n-1 bits to represent the full value.
2879   // The operation done below (on quadratic coefficients) that can produce
2880   // the largest value is the evaluation of the equation during bisection,
2881   // which needs 3 times the bitwidth of the coefficient, so the total number
2882   // of required bits is 3n.
2883   //
2884   // The purpose of this extension is to simulate the set Z of all integers,
2885   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2886   // and negative numbers (not so much in a modulo arithmetic). The method
2887   // used to solve the equation is based on the standard formula for real
2888   // numbers, and uses the concepts of "positive" and "negative" with their
2889   // usual meanings.
2890   CoeffWidth *= 3;
2891   A = A.sext(CoeffWidth);
2892   B = B.sext(CoeffWidth);
2893   C = C.sext(CoeffWidth);
2894 
2895   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2896   // the bit width has increased.
2897   if (A.isNegative()) {
2898     A.negate();
2899     B.negate();
2900     C.negate();
2901   }
2902 
2903   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2904   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2905   // and R = 2^BitWidth.
2906   // Since we're trying not only to find exact solutions, but also values
2907   // that "wrap around", such a set will always have a solution, i.e. an x
2908   // that satisfies at least one of the equations, or such that |q(x)|
2909   // exceeds kR, while |q(x-1)| for the same k does not.
2910   //
2911   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2912   // positive solution n (in the above sense), and also such that the n
2913   // will be the least among all solutions corresponding to k = 0, 1, ...
2914   // (more precisely, the least element in the set
2915   //   { n(k) | k is such that a solution n(k) exists }).
2916   //
2917   // Consider the parabola (over real numbers) that corresponds to the
2918   // quadratic equation. Since A > 0, the arms of the parabola will point
2919   // up. Picking different values of k will shift it up and down by R.
2920   //
2921   // We want to shift the parabola in such a way as to reduce the problem
2922   // of solving q(x) = kR to solving shifted_q(x) = 0.
2923   // (The interesting solutions are the ceilings of the real number
2924   // solutions.)
2925   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2926   APInt TwoA = 2 * A;
2927   APInt SqrB = B * B;
2928   bool PickLow;
2929 
2930   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2931     assert(A.isStrictlyPositive());
2932     APInt T = V.abs().urem(A);
2933     if (T.isNullValue())
2934       return V;
2935     return V.isNegative() ? V+T : V+(A-T);
2936   };
2937 
2938   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2939   // iff B is positive.
2940   if (B.isNonNegative()) {
2941     // If B >= 0, the vertex it at a negative location (or at 0), so in
2942     // order to have a non-negative solution we need to pick k that makes
2943     // C-kR negative. To satisfy all the requirements for the solution
2944     // that we are looking for, it needs to be closest to 0 of all k.
2945     C = C.srem(R);
2946     if (C.isStrictlyPositive())
2947       C -= R;
2948     // Pick the greater solution.
2949     PickLow = false;
2950   } else {
2951     // If B < 0, the vertex is at a positive location. For any solution
2952     // to exist, the discriminant must be non-negative. This means that
2953     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2954     // lower bound on values of k: kR >= C - B^2/4A.
2955     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2956     // Round LowkR up (towards +inf) to the nearest kR.
2957     LowkR = RoundUp(LowkR, R);
2958 
2959     // If there exists k meeting the condition above, and such that
2960     // C-kR > 0, there will be two positive real number solutions of
2961     // q(x) = kR. Out of all such values of k, pick the one that makes
2962     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2963     // In other words, find maximum k such that LowkR <= kR < C.
2964     if (C.sgt(LowkR)) {
2965       // If LowkR < C, then such a k is guaranteed to exist because
2966       // LowkR itself is a multiple of R.
2967       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2968       // Pick the smaller solution.
2969       PickLow = true;
2970     } else {
2971       // If C-kR < 0 for all potential k's, it means that one solution
2972       // will be negative, while the other will be positive. The positive
2973       // solution will shift towards 0 if the parabola is moved up.
2974       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2975       // to 0, or in other words, out of all parabolas that have solutions,
2976       // pick the one that is the farthest "up").
2977       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2978       C -= LowkR;
2979       // Pick the greater solution.
2980       PickLow = false;
2981     }
2982   }
2983 
2984   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2985                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2986 
2987   APInt D = SqrB - 4*A*C;
2988   assert(D.isNonNegative() && "Negative discriminant");
2989   APInt SQ = D.sqrt();
2990 
2991   APInt Q = SQ * SQ;
2992   bool InexactSQ = Q != D;
2993   // The calculated SQ may actually be greater than the exact (non-integer)
2994   // value. If that's the case, decrement SQ to get a value that is lower.
2995   if (Q.sgt(D))
2996     SQ -= 1;
2997 
2998   APInt X;
2999   APInt Rem;
3000 
3001   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
3002   // When using the quadratic formula directly, the calculated low root
3003   // may be greater than the exact one, since we would be subtracting SQ.
3004   // To make sure that the calculated root is not greater than the exact
3005   // one, subtract SQ+1 when calculating the low root (for inexact value
3006   // of SQ).
3007   if (PickLow)
3008     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
3009   else
3010     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
3011 
3012   // The updated coefficients should be such that the (exact) solution is
3013   // positive. Since APInt division rounds towards 0, the calculated one
3014   // can be 0, but cannot be negative.
3015   assert(X.isNonNegative() && "Solution should be non-negative");
3016 
3017   if (!InexactSQ && Rem.isNullValue()) {
3018     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3019     return X;
3020   }
3021 
3022   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3023   // The exact value of the square root of D should be between SQ and SQ+1.
3024   // This implies that the solution should be between that corresponding to
3025   // SQ (i.e. X) and that corresponding to SQ+1.
3026   //
3027   // The calculated X cannot be greater than the exact (real) solution.
3028   // Actually it must be strictly less than the exact solution, while
3029   // X+1 will be greater than or equal to it.
3030 
3031   APInt VX = (A*X + B)*X + C;
3032   APInt VY = VX + TwoA*X + A + B;
3033   bool SignChange = VX.isNegative() != VY.isNegative() ||
3034                     VX.isNullValue() != VY.isNullValue();
3035   // If the sign did not change between X and X+1, X is not a valid solution.
3036   // This could happen when the actual (exact) roots don't have an integer
3037   // between them, so they would both be contained between X and X+1.
3038   if (!SignChange) {
3039     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3040     return None;
3041   }
3042 
3043   X += 1;
3044   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3045   return X;
3046 }
3047 
3048 Optional<unsigned>
3049 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3050   assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3051   if (A == B)
3052     return llvm::None;
3053   return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3054 }
3055 
3056 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3057 /// with the integer held in IntVal.
3058 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3059                             unsigned StoreBytes) {
3060   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3061   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3062 
3063   if (sys::IsLittleEndianHost) {
3064     // Little-endian host - the source is ordered from LSB to MSB.  Order the
3065     // destination from LSB to MSB: Do a straight copy.
3066     memcpy(Dst, Src, StoreBytes);
3067   } else {
3068     // Big-endian host - the source is an array of 64 bit words ordered from
3069     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3070     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3071     while (StoreBytes > sizeof(uint64_t)) {
3072       StoreBytes -= sizeof(uint64_t);
3073       // May not be aligned so use memcpy.
3074       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3075       Src += sizeof(uint64_t);
3076     }
3077 
3078     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3079   }
3080 }
3081 
3082 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3083 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3084 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
3085                              unsigned LoadBytes) {
3086   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3087   uint8_t *Dst = reinterpret_cast<uint8_t *>(
3088                    const_cast<uint64_t *>(IntVal.getRawData()));
3089 
3090   if (sys::IsLittleEndianHost)
3091     // Little-endian host - the destination must be ordered from LSB to MSB.
3092     // The source is ordered from LSB to MSB: Do a straight copy.
3093     memcpy(Dst, Src, LoadBytes);
3094   else {
3095     // Big-endian - the destination is an array of 64 bit words ordered from
3096     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3097     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3098     // a word.
3099     while (LoadBytes > sizeof(uint64_t)) {
3100       LoadBytes -= sizeof(uint64_t);
3101       // May not be aligned so use memcpy.
3102       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3103       Dst += sizeof(uint64_t);
3104     }
3105 
3106     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3107   }
3108 }
3109